From 00117a07efbbe8482add12262a179326541467de Mon Sep 17 00:00:00 2001 From: Trung Ngo Date: Sat, 8 Oct 2022 05:33:21 -0500 Subject: check specifically for skipped --- modules/sd_samplers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index df17e93c..13a8b322 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -106,7 +106,7 @@ def extended_tdqm(sequence, *args, desc=None, **kwargs): seq = sequence if cmd_opts.disable_console_progressbars else tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs) for x in seq: - if state.interrupted: + if state.interrupted or state.skipped: break yield x @@ -254,7 +254,7 @@ def extended_trange(sampler, count, *args, **kwargs): seq = range(count) if cmd_opts.disable_console_progressbars else tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs) for x in seq: - if state.interrupted: + if state.interrupted or state.skipped: break if sampler.stop_at is not None and x > sampler.stop_at: -- cgit v1.2.3 From 77f4237d1c3af1756e7dab2699e3dcebad5619d6 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 8 Oct 2022 15:25:59 +0300 Subject: fix bugs related to variable prompt lengths --- modules/sd_hijack.py | 14 +++++++++----- modules/sd_samplers.py | 35 ++++++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 2c1332c9..7e7fde0f 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -89,7 +89,6 @@ class StableDiffusionModelHijack: layer.padding_mode = 'circular' if enable else 'zeros' def tokenize(self, text): - max_length = opts.max_prompt_tokens - 2 _, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text]) return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count) @@ -174,7 +173,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if line in cache: remade_tokens, fixes, multipliers = cache[line] else: - remade_tokens, fixes, multipliers, token_count = self.tokenize_line(line, used_custom_terms, hijack_comments) + remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments) + token_count = max(current_token_count, token_count) cache[line] = (remade_tokens, fixes, multipliers) @@ -265,15 +265,19 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if len(used_custom_terms) > 0: self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms])) - position_ids_array = [min(x, 75) for x in range(len(remade_batch_tokens[0])-1)] + [76] + target_token_count = get_target_prompt_token_count(token_count) + 2 + + position_ids_array = [min(x, 75) for x in range(target_token_count-1)] + [76] position_ids = torch.asarray(position_ids_array, device=devices.device).expand((1, -1)) - tokens = torch.asarray(remade_batch_tokens).to(device) + remade_batch_tokens_of_same_length = [x + [self.wrapped.tokenizer.eos_token_id] * (target_token_count - len(x)) for x in remade_batch_tokens] + tokens = torch.asarray(remade_batch_tokens_of_same_length).to(device) outputs = self.wrapped.transformer(input_ids=tokens, position_ids=position_ids) z = outputs.last_hidden_state # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise - batch_multipliers = torch.asarray(batch_multipliers).to(device) + batch_multipliers_of_same_length = [x + [1.0] * (target_token_count - len(x)) for x in batch_multipliers] + batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(device) original_mean = z.mean() z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape) new_mean = z.mean() diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 13a8b322..eade0dbb 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -142,6 +142,16 @@ class VanillaStableDiffusionSampler: assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers' cond = tensor + # for DDIM, shapes must match, we can't just process cond and uncond independently; + # filling unconditional_conditioning with repeats of the last vector to match length is + # not 100% correct but should work well enough + if unconditional_conditioning.shape[1] < cond.shape[1]: + last_vector = unconditional_conditioning[:, -1:] + last_vector_repeated = last_vector.repeat([1, cond.shape[1] - unconditional_conditioning.shape[1], 1]) + unconditional_conditioning = torch.hstack([unconditional_conditioning, last_vector_repeated]) + elif unconditional_conditioning.shape[1] > cond.shape[1]: + unconditional_conditioning = unconditional_conditioning[:, :cond.shape[1]] + if self.mask is not None: img_orig = self.sampler.model.q_sample(self.init_latent, ts) x_dec = img_orig * self.mask + self.nmask * x_dec @@ -221,18 +231,29 @@ class CFGDenoiser(torch.nn.Module): x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) - cond_in = torch.cat([tensor, uncond]) - if shared.batch_cond_uncond: - x_out = self.inner_model(x_in, sigma_in, cond=cond_in) + if tensor.shape[1] == uncond.shape[1]: + cond_in = torch.cat([tensor, uncond]) + + if shared.batch_cond_uncond: + x_out = self.inner_model(x_in, sigma_in, cond=cond_in) + else: + x_out = torch.zeros_like(x_in) + for batch_offset in range(0, x_out.shape[0], batch_size): + a = batch_offset + b = a + batch_size + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b]) else: x_out = torch.zeros_like(x_in) - for batch_offset in range(0, x_out.shape[0], batch_size): + batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size + for batch_offset in range(0, tensor.shape[0], batch_size): a = batch_offset - b = a + batch_size - x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b]) + b = min(a + batch_size, tensor.shape[0]) + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=tensor[a:b]) + + x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=uncond) - denoised_uncond = x_out[-batch_size:] + denoised_uncond = x_out[-uncond.shape[0]:] denoised = torch.clone(denoised_uncond) for i, conds in enumerate(conds_list): -- cgit v1.2.3 From 432782163ae53e605470bcefc9a6f796c4556912 Mon Sep 17 00:00:00 2001 From: Aidan Holland Date: Sat, 8 Oct 2022 15:12:24 -0400 Subject: chore: Fix typos --- README.md | 2 +- javascript/imageviewer.js | 2 +- modules/interrogate.py | 4 ++-- modules/processing.py | 2 +- modules/scunet_model_arch.py | 4 ++-- modules/sd_models.py | 4 ++-- modules/sd_samplers.py | 4 ++-- modules/shared.py | 6 +++--- modules/swinir_model_arch.py | 2 +- modules/ui.py | 4 ++-- 10 files changed, 17 insertions(+), 17 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/README.md b/README.md index ef9b5e31..63dd0c18 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - Sampling method selection - Interrupt processing at any time - 4GB video card support (also reports of 2GB working) -- Correct seeds for batches +- Correct seeds for batches - Prompt length validation - get length of prompt in tokens as you type - get a warning after generation if some text was truncated diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js index 4c0e8f4b..6a00c0da 100644 --- a/javascript/imageviewer.js +++ b/javascript/imageviewer.js @@ -95,7 +95,7 @@ function showGalleryImage(){ e.addEventListener('click', function (evt) { if(!opts.js_modal_lightbox) return; - modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initialy_zoomed) + modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initially_zoomed) showModal(evt) },true); } diff --git a/modules/interrogate.py b/modules/interrogate.py index eed87144..635e266e 100644 --- a/modules/interrogate.py +++ b/modules/interrogate.py @@ -140,11 +140,11 @@ class InterrogateModels: res = caption - cilp_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device) + clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device) precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext with torch.no_grad(), precision_scope("cuda"): - image_features = self.clip_model.encode_image(cilp_image).type(self.dtype) + image_features = self.clip_model.encode_image(clip_image).type(self.dtype) image_features /= image_features.norm(dim=-1, keepdim=True) diff --git a/modules/processing.py b/modules/processing.py index 515fc91a..31220881 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -386,7 +386,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: if state.interrupted or state.skipped: - # if we are interruped, sample returns just noise + # if we are interrupted, sample returns just noise # use the image collected previously in sampler loop samples_ddim = shared.state.current_latent diff --git a/modules/scunet_model_arch.py b/modules/scunet_model_arch.py index 972a2639..43ca8d36 100644 --- a/modules/scunet_model_arch.py +++ b/modules/scunet_model_arch.py @@ -40,7 +40,7 @@ class WMSA(nn.Module): Returns: attn_mask: should be (1 1 w p p), """ - # supporting sqaure. + # supporting square. attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device) if self.type == 'W': return attn_mask @@ -65,7 +65,7 @@ class WMSA(nn.Module): x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size) h_windows = x.size(1) w_windows = x.size(2) - # sqaure validation + # square validation # assert h_windows == w_windows x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size) diff --git a/modules/sd_models.py b/modules/sd_models.py index 9409d070..a09866ce 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -147,7 +147,7 @@ def load_model_weights(model, checkpoint_file, sd_model_hash): model.first_stage_model.load_state_dict(vae_dict) model.sd_model_hash = sd_model_hash - model.sd_model_checkpint = checkpoint_file + model.sd_model_checkpoint = checkpoint_file def load_model(): @@ -175,7 +175,7 @@ def reload_model_weights(sd_model, info=None): from modules import lowvram, devices, sd_hijack checkpoint_info = info or select_checkpoint() - if sd_model.sd_model_checkpint == checkpoint_info.filename: + if sd_model.sd_model_checkpoint == checkpoint_info.filename: return if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index eade0dbb..6e743f7e 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -181,7 +181,7 @@ class VanillaStableDiffusionSampler: self.initialize(p) - # existing code fails with cetain step counts, like 9 + # existing code fails with certain step counts, like 9 try: self.sampler.make_schedule(ddim_num_steps=steps, ddim_eta=self.eta, ddim_discretize=p.ddim_discretize, verbose=False) except Exception: @@ -204,7 +204,7 @@ class VanillaStableDiffusionSampler: steps = steps or p.steps - # existing code fails with cetin step counts, like 9 + # existing code fails with certain step counts, like 9 try: samples_ddim, _ = self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta) except Exception: diff --git a/modules/shared.py b/modules/shared.py index af8dc744..2dc092d6 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -141,9 +141,9 @@ class OptionInfo: self.section = None -def options_section(section_identifer, options_dict): +def options_section(section_identifier, options_dict): for k, v in options_dict.items(): - v.section = section_identifer + v.section = section_identifier return options_dict @@ -246,7 +246,7 @@ options_templates.update(options_section(('ui', "User interface"), { "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"), "font": OptionInfo("", "Font for image grids that have text"), "js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"), - "js_modal_lightbox_initialy_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"), + "js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"), "show_progress_in_title": OptionInfo(True, "Show generation progress in window title."), })) diff --git a/modules/swinir_model_arch.py b/modules/swinir_model_arch.py index 461fb354..863f42db 100644 --- a/modules/swinir_model_arch.py +++ b/modules/swinir_model_arch.py @@ -166,7 +166,7 @@ class SwinTransformerBlock(nn.Module): Args: dim (int): Number of input channels. - input_resolution (tuple[int]): Input resulotion. + input_resolution (tuple[int]): Input resolution. num_heads (int): Number of attention heads. window_size (int): Window size. shift_size (int): Shift size for SW-MSA. diff --git a/modules/ui.py b/modules/ui.py index b09359aa..b51af121 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -38,7 +38,7 @@ from modules import prompt_parser from modules.images import save_image import modules.textual_inversion.ui -# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI +# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI mimetypes.init() mimetypes.add_type('application/javascript', '.js') @@ -102,7 +102,7 @@ def save_files(js_data, images, index): import csv filenames = [] - #quick dictionary to class object conversion. Its neccesary due apply_filename_pattern requiring it + #quick dictionary to class object conversion. Its necessary due apply_filename_pattern requiring it class MyObject: def __init__(self, d=None): if d is not None: -- cgit v1.2.3 From 7349088d32b080f64058b6e5de5f0380a71ecd09 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Mon, 10 Oct 2022 16:11:14 +0300 Subject: --no-half-vae --- modules/devices.py | 6 +++++- modules/processing.py | 11 +++++++++-- modules/sd_models.py | 3 +++ modules/sd_samplers.py | 4 ++-- modules/shared.py | 1 + 5 files changed, 20 insertions(+), 5 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/devices.py b/modules/devices.py index 0158b11f..03ef58f1 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -36,6 +36,7 @@ errors.run(enable_tf32, "Enabling TF32") device = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() dtype = torch.float16 +dtype_vae = torch.float16 def randn(seed, shape): # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. @@ -59,9 +60,12 @@ def randn_without_seed(shape): return torch.randn(shape, device=device) -def autocast(): +def autocast(disable=False): from modules import shared + if disable: + return contextlib.nullcontext() + if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() diff --git a/modules/processing.py b/modules/processing.py index 94d2dd62..ec8651ae 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -259,6 +259,13 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see return x +def decode_first_stage(model, x): + with devices.autocast(disable=x.dtype == devices.dtype_vae): + x = model.decode_first_stage(x) + + return x + + def get_fixed_seed(seed): if seed is None or seed == '' or seed == -1: return int(random.randrange(4294967294)) @@ -400,7 +407,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: samples_ddim = samples_ddim.to(devices.dtype) - x_samples_ddim = p.sd_model.decode_first_stage(samples_ddim) + x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim) x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) del samples_ddim @@ -533,7 +540,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): if self.scale_latent: samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") else: - decoded_samples = self.sd_model.decode_first_stage(samples) + decoded_samples = decode_first_stage(self.sd_model, samples) if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None": decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear") diff --git a/modules/sd_models.py b/modules/sd_models.py index e63d3c29..2cdcd84f 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -149,6 +149,7 @@ def load_model_weights(model, checkpoint_info): model.half() devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16 + devices.dtype_vae = torch.float32 if shared.cmd_opts.no_half or shared.cmd_opts.no_half_vae else torch.float16 vae_file = os.path.splitext(checkpoint_file)[0] + ".vae.pt" if os.path.exists(vae_file): @@ -158,6 +159,8 @@ def load_model_weights(model, checkpoint_info): model.first_stage_model.load_state_dict(vae_dict) + model.first_stage_model.to(devices.dtype_vae) + model.sd_model_hash = sd_model_hash model.sd_model_checkpoint = checkpoint_file model.sd_checkpoint_info = checkpoint_info diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 6e743f7e..d168b938 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -7,7 +7,7 @@ import inspect import k_diffusion.sampling import ldm.models.diffusion.ddim import ldm.models.diffusion.plms -from modules import prompt_parser +from modules import prompt_parser, devices, processing from modules.shared import opts, cmd_opts, state import modules.shared as shared @@ -83,7 +83,7 @@ def setup_img2img_steps(p, steps=None): def sample_to_image(samples): - x_sample = shared.sd_model.decode_first_stage(samples[0:1].type(shared.sd_model.dtype))[0] + x_sample = processing.decode_first_stage(shared.sd_model, samples[0:1])[0] x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) x_sample = x_sample.astype(np.uint8) diff --git a/modules/shared.py b/modules/shared.py index 1995a99a..5dfc344c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -25,6 +25,7 @@ parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to director parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN')) parser.add_argument("--gfpgan-model", type=str, help="GFPGAN model file name", default=None) parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats") +parser.add_argument("--no-half-vae", action='store_true', help="do not switch the VAE model to 16-bit floats") parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)") parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI") parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)") -- cgit v1.2.3