From 00117a07efbbe8482add12262a179326541467de Mon Sep 17 00:00:00 2001
From: Trung Ngo <codem01@gmail.com>
Date: Sat, 8 Oct 2022 05:33:21 -0500
Subject: check specifically for skipped

---
 modules/sd_samplers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/sd_samplers.py')

diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index df17e93c..13a8b322 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -106,7 +106,7 @@ def extended_tdqm(sequence, *args, desc=None, **kwargs):
     seq = sequence if cmd_opts.disable_console_progressbars else tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs)
 
     for x in seq:
-        if state.interrupted:
+        if state.interrupted or state.skipped:
             break
 
         yield x
@@ -254,7 +254,7 @@ def extended_trange(sampler, count, *args, **kwargs):
     seq = range(count) if cmd_opts.disable_console_progressbars else tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs)
 
     for x in seq:
-        if state.interrupted:
+        if state.interrupted or state.skipped:
             break
 
         if sampler.stop_at is not None and x > sampler.stop_at:
-- 
cgit v1.2.3


From 77f4237d1c3af1756e7dab2699e3dcebad5619d6 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 8 Oct 2022 15:25:59 +0300
Subject: fix bugs related to variable prompt lengths

---
 modules/sd_hijack.py   | 14 +++++++++-----
 modules/sd_samplers.py | 35 ++++++++++++++++++++++++++++-------
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'modules/sd_samplers.py')

diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 2c1332c9..7e7fde0f 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -89,7 +89,6 @@ class StableDiffusionModelHijack:
             layer.padding_mode = 'circular' if enable else 'zeros'
 
     def tokenize(self, text):
-        max_length = opts.max_prompt_tokens - 2
         _, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text])
         return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count)
 
@@ -174,7 +173,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             if line in cache:
                 remade_tokens, fixes, multipliers = cache[line]
             else:
-                remade_tokens, fixes, multipliers, token_count = self.tokenize_line(line, used_custom_terms, hijack_comments)
+                remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments)
+                token_count = max(current_token_count, token_count)
 
                 cache[line] = (remade_tokens, fixes, multipliers)
 
@@ -265,15 +265,19 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
         if len(used_custom_terms) > 0:
             self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
 
-        position_ids_array = [min(x, 75) for x in range(len(remade_batch_tokens[0])-1)] + [76]
+        target_token_count = get_target_prompt_token_count(token_count) + 2
+
+        position_ids_array = [min(x, 75) for x in range(target_token_count-1)] + [76]
         position_ids = torch.asarray(position_ids_array, device=devices.device).expand((1, -1))
 
-        tokens = torch.asarray(remade_batch_tokens).to(device)
+        remade_batch_tokens_of_same_length = [x + [self.wrapped.tokenizer.eos_token_id] * (target_token_count - len(x)) for x in remade_batch_tokens]
+        tokens = torch.asarray(remade_batch_tokens_of_same_length).to(device)
         outputs = self.wrapped.transformer(input_ids=tokens, position_ids=position_ids)
         z = outputs.last_hidden_state
 
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
-        batch_multipliers = torch.asarray(batch_multipliers).to(device)
+        batch_multipliers_of_same_length = [x + [1.0] * (target_token_count - len(x)) for x in batch_multipliers]
+        batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(device)
         original_mean = z.mean()
         z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
         new_mean = z.mean()
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 13a8b322..eade0dbb 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -142,6 +142,16 @@ class VanillaStableDiffusionSampler:
         assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers'
         cond = tensor
 
+        # for DDIM, shapes must match, we can't just process cond and uncond independently;
+        # filling unconditional_conditioning with repeats of the last vector to match length is
+        # not 100% correct but should work well enough
+        if unconditional_conditioning.shape[1] < cond.shape[1]:
+            last_vector = unconditional_conditioning[:, -1:]
+            last_vector_repeated = last_vector.repeat([1, cond.shape[1] - unconditional_conditioning.shape[1], 1])
+            unconditional_conditioning = torch.hstack([unconditional_conditioning, last_vector_repeated])
+        elif unconditional_conditioning.shape[1] > cond.shape[1]:
+            unconditional_conditioning = unconditional_conditioning[:, :cond.shape[1]]
+
         if self.mask is not None:
             img_orig = self.sampler.model.q_sample(self.init_latent, ts)
             x_dec = img_orig * self.mask + self.nmask * x_dec
@@ -221,18 +231,29 @@ class CFGDenoiser(torch.nn.Module):
 
         x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x])
         sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma])
-        cond_in = torch.cat([tensor, uncond])
 
-        if shared.batch_cond_uncond:
-            x_out = self.inner_model(x_in, sigma_in, cond=cond_in)
+        if tensor.shape[1] == uncond.shape[1]:
+            cond_in = torch.cat([tensor, uncond])
+
+            if shared.batch_cond_uncond:
+                x_out = self.inner_model(x_in, sigma_in, cond=cond_in)
+            else:
+                x_out = torch.zeros_like(x_in)
+                for batch_offset in range(0, x_out.shape[0], batch_size):
+                    a = batch_offset
+                    b = a + batch_size
+                    x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b])
         else:
             x_out = torch.zeros_like(x_in)
-            for batch_offset in range(0, x_out.shape[0], batch_size):
+            batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size
+            for batch_offset in range(0, tensor.shape[0], batch_size):
                 a = batch_offset
-                b = a + batch_size
-                x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b])
+                b = min(a + batch_size, tensor.shape[0])
+                x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=tensor[a:b])
+
+            x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=uncond)
 
-        denoised_uncond = x_out[-batch_size:]
+        denoised_uncond = x_out[-uncond.shape[0]:]
         denoised = torch.clone(denoised_uncond)
 
         for i, conds in enumerate(conds_list):
-- 
cgit v1.2.3


From 432782163ae53e605470bcefc9a6f796c4556912 Mon Sep 17 00:00:00 2001
From: Aidan Holland <thehappydinoa@gmail.com>
Date: Sat, 8 Oct 2022 15:12:24 -0400
Subject: chore: Fix typos

---
 README.md                    | 2 +-
 javascript/imageviewer.js    | 2 +-
 modules/interrogate.py       | 4 ++--
 modules/processing.py        | 2 +-
 modules/scunet_model_arch.py | 4 ++--
 modules/sd_models.py         | 4 ++--
 modules/sd_samplers.py       | 4 ++--
 modules/shared.py            | 6 +++---
 modules/swinir_model_arch.py | 2 +-
 modules/ui.py                | 4 ++--
 10 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'modules/sd_samplers.py')

diff --git a/README.md b/README.md
index ef9b5e31..63dd0c18 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
 - Sampling method selection
 - Interrupt processing at any time
 - 4GB video card support (also reports of 2GB working)
-- Correct seeds for batches 
+- Correct seeds for batches
 - Prompt length validation
      - get length of prompt in tokens as you type
      - get a warning after generation if some text was truncated
diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js
index 4c0e8f4b..6a00c0da 100644
--- a/javascript/imageviewer.js
+++ b/javascript/imageviewer.js
@@ -95,7 +95,7 @@ function showGalleryImage(){
 
                     e.addEventListener('click', function (evt) {
                         if(!opts.js_modal_lightbox) return;
-                        modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initialy_zoomed)
+                        modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initially_zoomed)
                         showModal(evt)
                     },true);
                 }
diff --git a/modules/interrogate.py b/modules/interrogate.py
index eed87144..635e266e 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -140,11 +140,11 @@ class InterrogateModels:
 
             res = caption
 
-            cilp_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
+            clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
 
             precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext
             with torch.no_grad(), precision_scope("cuda"):
-                image_features = self.clip_model.encode_image(cilp_image).type(self.dtype)
+                image_features = self.clip_model.encode_image(clip_image).type(self.dtype)
 
                 image_features /= image_features.norm(dim=-1, keepdim=True)
 
diff --git a/modules/processing.py b/modules/processing.py
index 515fc91a..31220881 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -386,7 +386,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
             if state.interrupted or state.skipped:
 
-                # if we are interruped, sample returns just noise
+                # if we are interrupted, sample returns just noise
                 # use the image collected previously in sampler loop
                 samples_ddim = shared.state.current_latent
 
diff --git a/modules/scunet_model_arch.py b/modules/scunet_model_arch.py
index 972a2639..43ca8d36 100644
--- a/modules/scunet_model_arch.py
+++ b/modules/scunet_model_arch.py
@@ -40,7 +40,7 @@ class WMSA(nn.Module):
         Returns:
             attn_mask: should be (1 1 w p p),
         """
-        # supporting sqaure.
+        # supporting square.
         attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device)
         if self.type == 'W':
             return attn_mask
@@ -65,7 +65,7 @@ class WMSA(nn.Module):
         x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size)
         h_windows = x.size(1)
         w_windows = x.size(2)
-        # sqaure validation
+        # square validation
         # assert h_windows == w_windows
 
         x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 9409d070..a09866ce 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -147,7 +147,7 @@ def load_model_weights(model, checkpoint_file, sd_model_hash):
         model.first_stage_model.load_state_dict(vae_dict)
 
     model.sd_model_hash = sd_model_hash
-    model.sd_model_checkpint = checkpoint_file
+    model.sd_model_checkpoint = checkpoint_file
 
 
 def load_model():
@@ -175,7 +175,7 @@ def reload_model_weights(sd_model, info=None):
     from modules import lowvram, devices, sd_hijack
     checkpoint_info = info or select_checkpoint()
 
-    if sd_model.sd_model_checkpint == checkpoint_info.filename:
+    if sd_model.sd_model_checkpoint == checkpoint_info.filename:
         return
 
     if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index eade0dbb..6e743f7e 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -181,7 +181,7 @@ class VanillaStableDiffusionSampler:
 
         self.initialize(p)
 
-        # existing code fails with cetain step counts, like 9
+        # existing code fails with certain step counts, like 9
         try:
             self.sampler.make_schedule(ddim_num_steps=steps,  ddim_eta=self.eta, ddim_discretize=p.ddim_discretize, verbose=False)
         except Exception:
@@ -204,7 +204,7 @@ class VanillaStableDiffusionSampler:
 
         steps = steps or p.steps
 
-        # existing code fails with cetin step counts, like 9
+        # existing code fails with certain step counts, like 9
         try:
             samples_ddim, _ = self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)
         except Exception:
diff --git a/modules/shared.py b/modules/shared.py
index af8dc744..2dc092d6 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -141,9 +141,9 @@ class OptionInfo:
         self.section = None
 
 
-def options_section(section_identifer, options_dict):
+def options_section(section_identifier, options_dict):
     for k, v in options_dict.items():
-        v.section = section_identifer
+        v.section = section_identifier
 
     return options_dict
 
@@ -246,7 +246,7 @@ options_templates.update(options_section(('ui', "User interface"), {
     "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"),
     "font": OptionInfo("", "Font for image grids that have text"),
     "js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
-    "js_modal_lightbox_initialy_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
+    "js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
     "show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
 }))
 
diff --git a/modules/swinir_model_arch.py b/modules/swinir_model_arch.py
index 461fb354..863f42db 100644
--- a/modules/swinir_model_arch.py
+++ b/modules/swinir_model_arch.py
@@ -166,7 +166,7 @@ class SwinTransformerBlock(nn.Module):
 
     Args:
         dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resulotion.
+        input_resolution (tuple[int]): Input resolution.
         num_heads (int): Number of attention heads.
         window_size (int): Window size.
         shift_size (int): Shift size for SW-MSA.
diff --git a/modules/ui.py b/modules/ui.py
index b09359aa..b51af121 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -38,7 +38,7 @@ from modules import prompt_parser
 from modules.images import save_image
 import modules.textual_inversion.ui
 
-# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI
+# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI
 mimetypes.init()
 mimetypes.add_type('application/javascript', '.js')
 
@@ -102,7 +102,7 @@ def save_files(js_data, images, index):
     import csv    
     filenames = []
 
-    #quick dictionary to class object conversion. Its neccesary due apply_filename_pattern requiring it
+    #quick dictionary to class object conversion. Its necessary due apply_filename_pattern requiring it
     class MyObject:
         def __init__(self, d=None):
             if d is not None:
-- 
cgit v1.2.3


From 7349088d32b080f64058b6e5de5f0380a71ecd09 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 10 Oct 2022 16:11:14 +0300
Subject: --no-half-vae

---
 modules/devices.py     |  6 +++++-
 modules/processing.py  | 11 +++++++++--
 modules/sd_models.py   |  3 +++
 modules/sd_samplers.py |  4 ++--
 modules/shared.py      |  1 +
 5 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'modules/sd_samplers.py')

diff --git a/modules/devices.py b/modules/devices.py
index 0158b11f..03ef58f1 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -36,6 +36,7 @@ errors.run(enable_tf32, "Enabling TF32")
 
 device = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device()
 dtype = torch.float16
+dtype_vae = torch.float16
 
 def randn(seed, shape):
     # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
@@ -59,9 +60,12 @@ def randn_without_seed(shape):
     return torch.randn(shape, device=device)
 
 
-def autocast():
+def autocast(disable=False):
     from modules import shared
 
+    if disable:
+        return contextlib.nullcontext()
+
     if dtype == torch.float32 or shared.cmd_opts.precision == "full":
         return contextlib.nullcontext()
 
diff --git a/modules/processing.py b/modules/processing.py
index 94d2dd62..ec8651ae 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -259,6 +259,13 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
     return x
 
 
+def decode_first_stage(model, x):
+    with devices.autocast(disable=x.dtype == devices.dtype_vae):
+        x = model.decode_first_stage(x)
+
+    return x
+
+
 def get_fixed_seed(seed):
     if seed is None or seed == '' or seed == -1:
         return int(random.randrange(4294967294))
@@ -400,7 +407,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
             samples_ddim = samples_ddim.to(devices.dtype)
 
-            x_samples_ddim = p.sd_model.decode_first_stage(samples_ddim)
+            x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim)
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
 
             del samples_ddim
@@ -533,7 +540,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         if self.scale_latent:
             samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
         else:
-            decoded_samples = self.sd_model.decode_first_stage(samples)
+            decoded_samples = decode_first_stage(self.sd_model, samples)
 
             if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None":
                 decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear")
diff --git a/modules/sd_models.py b/modules/sd_models.py
index e63d3c29..2cdcd84f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -149,6 +149,7 @@ def load_model_weights(model, checkpoint_info):
         model.half()
 
     devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16
+    devices.dtype_vae = torch.float32 if shared.cmd_opts.no_half or shared.cmd_opts.no_half_vae else torch.float16
 
     vae_file = os.path.splitext(checkpoint_file)[0] + ".vae.pt"
     if os.path.exists(vae_file):
@@ -158,6 +159,8 @@ def load_model_weights(model, checkpoint_info):
 
         model.first_stage_model.load_state_dict(vae_dict)
 
+    model.first_stage_model.to(devices.dtype_vae)
+
     model.sd_model_hash = sd_model_hash
     model.sd_model_checkpoint = checkpoint_file
     model.sd_checkpoint_info = checkpoint_info
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 6e743f7e..d168b938 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -7,7 +7,7 @@ import inspect
 import k_diffusion.sampling
 import ldm.models.diffusion.ddim
 import ldm.models.diffusion.plms
-from modules import prompt_parser
+from modules import prompt_parser, devices, processing
 
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
@@ -83,7 +83,7 @@ def setup_img2img_steps(p, steps=None):
 
 
 def sample_to_image(samples):
-    x_sample = shared.sd_model.decode_first_stage(samples[0:1].type(shared.sd_model.dtype))[0]
+    x_sample = processing.decode_first_stage(shared.sd_model, samples[0:1])[0]
     x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0)
     x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
     x_sample = x_sample.astype(np.uint8)
diff --git a/modules/shared.py b/modules/shared.py
index 1995a99a..5dfc344c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -25,6 +25,7 @@ parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to director
 parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN'))
 parser.add_argument("--gfpgan-model", type=str, help="GFPGAN model file name", default=None)
 parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats")
+parser.add_argument("--no-half-vae", action='store_true', help="do not switch the VAE model to 16-bit floats")
 parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)")
 parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI")
 parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)")
-- 
cgit v1.2.3