From 8e7097d06a6a261580d34375c9d2a9e4ffc63ffa Mon Sep 17 00:00:00 2001
From: random_thoughtss <random_thoughtss@proton.me>
Date: Wed, 19 Oct 2022 13:47:45 -0700
Subject: Added support for RunwayML inpainting model

---
 modules/processing.py | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index bcb0c32c..a6c308f9 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -546,7 +546,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         if not self.enable_hr:
             x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-            samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
+            
+            # The "masked-image" in this case will just be all zeros since the entire image is masked.
+            image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device)
+            image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) 
+
+            # Add the fake full 1s mask to the first dimension.
+            image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+            image_conditioning = image_conditioning.to(x.dtype)
+
+            samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=image_conditioning)
             return samples
 
         x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
@@ -714,10 +723,31 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
+        if self.image_mask is not None:
+            conditioning_mask = np.array(self.image_mask.convert("L"))
+            conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
+            conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
+
+            # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
+            conditioning_mask = torch.round(conditioning_mask)
+        else:
+            conditioning_mask = torch.ones(1, 1, *image.shape[-2:])
+
+        # Create another latent image, this time with a masked version of the original input.
+        conditioning_mask = conditioning_mask.to(image.device)
+        conditioning_image = image * (1.0 - conditioning_mask)
+        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
+
+        # Create the concatenated conditioning tensor to be fed to `c_concat`
+        conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:])
+        conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
+        self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
+        self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype)
+
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
         x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
 
-        samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning)
+        samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
         if self.mask is not None:
             samples = samples * self.nmask + self.init_latent * self.mask
-- 
cgit v1.2.3


From c418467c03db916c3e5312e6ac4a67365e196dbd Mon Sep 17 00:00:00 2001
From: random_thoughtss <random_thoughtss@proton.me>
Date: Wed, 19 Oct 2022 15:09:43 -0700
Subject: Don't compute latent mask if were not using it. Also added support
 for fixed highres_fix generation.

---
 modules/processing.py  | 72 +++++++++++++++++++++++++++++++-------------------
 modules/sd_samplers.py |  4 +++
 2 files changed, 49 insertions(+), 27 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index a6c308f9..684e5833 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -541,12 +541,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
 
 
-    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
-        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
-
-        if not self.enable_hr:
-            x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-            
+    def create_dummy_mask(self, x):
+        if self.sampler.conditioning_key in {'hybrid', 'concat'}:
             # The "masked-image" in this case will just be all zeros since the entire image is masked.
             image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device)
             image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) 
@@ -555,11 +551,23 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
             image_conditioning = image_conditioning.to(x.dtype)
 
-            samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=image_conditioning)
+        else:
+            # Dummy zero conditioning if we're not using inpainting model.
+            # Still takes up a bit of memory, but no encoder call.
+            image_conditioning = torch.zeros(x.shape[0], 5, x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device)
+
+        return image_conditioning
+
+    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
+
+        if not self.enable_hr:
+            x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+            samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x))
             return samples
 
         x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
+        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x))
 
         samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
 
@@ -596,7 +604,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         x = None
         devices.torch_gc()
 
-        samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps)
+        samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps, image_conditioning=self.create_dummy_mask(samples))
 
         return samples
 
@@ -723,26 +731,36 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
-        if self.image_mask is not None:
-            conditioning_mask = np.array(self.image_mask.convert("L"))
-            conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
-            conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
+        conditioning_key = self.sampler.conditioning_key
 
-            # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
-            conditioning_mask = torch.round(conditioning_mask)
+        if conditioning_key in {'hybrid', 'concat'}:
+            if self.image_mask is not None:
+                conditioning_mask = np.array(self.image_mask.convert("L"))
+                conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
+                conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
+
+                # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
+                conditioning_mask = torch.round(conditioning_mask)
+            else:
+                conditioning_mask = torch.ones(1, 1, *image.shape[-2:])
+
+            # Create another latent image, this time with a masked version of the original input.
+            conditioning_mask = conditioning_mask.to(image.device)
+            conditioning_image = image * (1.0 - conditioning_mask)
+            conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
+
+            # Create the concatenated conditioning tensor to be fed to `c_concat`
+            conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:])
+            conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
+            self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
+            self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype)
         else:
-            conditioning_mask = torch.ones(1, 1, *image.shape[-2:])
-
-        # Create another latent image, this time with a masked version of the original input.
-        conditioning_mask = conditioning_mask.to(image.device)
-        conditioning_image = image * (1.0 - conditioning_mask)
-        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
-
-        # Create the concatenated conditioning tensor to be fed to `c_concat`
-        conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:])
-        conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
-        self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
-        self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype)
+            self.image_conditioning = torch.zeros(
+                self.init_latent.shape[0], 5, self.init_latent.shape[-2], self.init_latent.shape[-1], 
+                dtype=self.init_latent.dtype, 
+                device=self.init_latent.device
+            )
+
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
         x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index d270e4df..c21be26e 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -117,6 +117,8 @@ class VanillaStableDiffusionSampler:
         self.config = None
         self.last_latent = None
 
+        self.conditioning_key = sd_model.model.conditioning_key
+
     def number_of_needed_noises(self, p):
         return 0
 
@@ -328,6 +330,8 @@ class KDiffusionSampler:
         self.config = None
         self.last_latent = None
 
+        self.conditioning_key = sd_model.model.conditioning_key
+
     def callback_state(self, d):
         step = d['i']
         latent = d["denoised"]
-- 
cgit v1.2.3


From aa7ff2a1972f3865883e10ba28c5414cdebe8e3b Mon Sep 17 00:00:00 2001
From: random_thoughtss <random_thoughtss@proton.me>
Date: Wed, 19 Oct 2022 21:46:13 -0700
Subject: Fixed non-square highres fix generation

---
 modules/processing.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 684e5833..3caac25e 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -541,10 +541,13 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
 
 
-    def create_dummy_mask(self, x):
+    def create_dummy_mask(self, x, first_phase: bool = False):
         if self.sampler.conditioning_key in {'hybrid', 'concat'}:
+            height = self.firstphase_height if first_phase else self.height
+            width = self.firstphase_width if first_phase else self.width
+
             # The "masked-image" in this case will just be all zeros since the entire image is masked.
-            image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device)
+            image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
             image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) 
 
             # Add the fake full 1s mask to the first dimension.
@@ -567,7 +570,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             return samples
 
         x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x))
+        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, first_phase=True))
 
         samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
 
-- 
cgit v1.2.3


From 92a17a7a4a13fceb3c3e25a2e854b2a7dd6eb5df Mon Sep 17 00:00:00 2001
From: random_thoughtss <random_thoughtss@proton.me>
Date: Thu, 20 Oct 2022 09:45:03 -0700
Subject: Made dummy latents smaller. Minor code cleanups

---
 modules/processing.py  | 7 ++++---
 modules/sd_samplers.py | 6 ++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 3caac25e..539cde38 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -557,7 +557,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         else:
             # Dummy zero conditioning if we're not using inpainting model.
             # Still takes up a bit of memory, but no encoder call.
-            image_conditioning = torch.zeros(x.shape[0], 5, x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device)
+            # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
+            image_conditioning = torch.zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
 
         return image_conditioning
 
@@ -759,8 +760,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype)
         else:
             self.image_conditioning = torch.zeros(
-                self.init_latent.shape[0], 5, self.init_latent.shape[-2], self.init_latent.shape[-1], 
-                dtype=self.init_latent.dtype, 
+                self.init_latent.shape[0], 5, 1, 1,
+                dtype=self.init_latent.dtype,
                 device=self.init_latent.device
             )
 
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index c21be26e..cc682593 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -138,7 +138,7 @@ class VanillaStableDiffusionSampler:
         if self.stop_at is not None and self.step > self.stop_at:
             raise InterruptedException
 
-        # Have to unwrap the inpainting conditioning here to perform pre-preocessing
+        # Have to unwrap the inpainting conditioning here to perform pre-processing
         image_conditioning = None
         if isinstance(cond, dict):
             image_conditioning = cond["c_concat"][0]
@@ -146,7 +146,7 @@ class VanillaStableDiffusionSampler:
             unconditional_conditioning = unconditional_conditioning["c_crossattn"][0]
 
         conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
-        unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step)            
+        unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step)
 
         assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers'
         cond = tensor
@@ -165,6 +165,8 @@ class VanillaStableDiffusionSampler:
             img_orig = self.sampler.model.q_sample(self.init_latent, ts)
             x_dec = img_orig * self.mask + self.nmask * x_dec
 
+        # Wrap the image conditioning back up since the DDIM code can accept the dict directly.
+        # Note that they need to be lists because it just concatenates them later.
         if image_conditioning is not None:
             cond = {"c_concat": [image_conditioning], "c_crossattn": [cond]}
             unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]}
-- 
cgit v1.2.3


From 45872181902ada06267e2de601586d512cf5df1a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Fri, 21 Oct 2022 09:00:39 +0300
Subject: updated readme and some small stylistic changes to code

---
 README.md                       |  1 +
 modules/processing.py           | 14 ++++++--------
 modules/sd_hijack_inpainting.py |  3 +++
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'modules/processing.py')

diff --git a/README.md b/README.md
index 859a91b6..a98bb00b 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
 - No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
 - DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
 - [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
+- Support for dedicated [inpainting model](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion) by RunwayML. 
 
 ## Installation and Running
 Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
diff --git a/modules/processing.py b/modules/processing.py
index 539cde38..21786968 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -540,11 +540,10 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f
             self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
 
-
-    def create_dummy_mask(self, x, first_phase: bool = False):
+    def create_dummy_mask(self, x, width=None, height=None):
         if self.sampler.conditioning_key in {'hybrid', 'concat'}:
-            height = self.firstphase_height if first_phase else self.height
-            width = self.firstphase_width if first_phase else self.width
+            height = height or self.height
+            width = width or self.width
 
             # The "masked-image" in this case will just be all zeros since the entire image is masked.
             image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
@@ -571,7 +570,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             return samples
 
         x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, first_phase=True))
+        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, self.firstphase_width, self.firstphase_height))
 
         samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
 
@@ -634,6 +633,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         self.inpainting_mask_invert = inpainting_mask_invert
         self.mask = None
         self.nmask = None
+        self.image_conditioning = None
 
     def init(self, all_prompts, all_seeds, all_subseeds):
         self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model)
@@ -735,9 +735,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
-        conditioning_key = self.sampler.conditioning_key
-
-        if conditioning_key in {'hybrid', 'concat'}:
+        if self.sampler.conditioning_key in {'hybrid', 'concat'}:
             if self.image_mask is not None:
                 conditioning_mask = np.array(self.image_mask.convert("L"))
                 conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 43938071..fd92a335 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -301,6 +301,7 @@ def get_unconditional_conditioning(self, batch_size, null_label=None):
     c = repeat(c, "1 ... -> b ...", b=batch_size).to(self.device)
     return c
 
+
 class LatentInpaintDiffusion(LatentDiffusion):
     def __init__(
         self,
@@ -314,9 +315,11 @@ class LatentInpaintDiffusion(LatentDiffusion):
         assert self.masked_image_key in concat_keys
         self.concat_keys = concat_keys
 
+
 def should_hijack_inpainting(checkpoint_info):
     return str(checkpoint_info.filename).endswith("inpainting.ckpt") and not checkpoint_info.config.endswith("inpainting.yaml")
 
+
 def do_inpainting_hijack():
     ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
     ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
-- 
cgit v1.2.3


From bf30673f5132c8f28357b31224c54331e788d3e7 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Fri, 21 Oct 2022 10:19:25 +0300
Subject: Fix Hypernet infotext string split bug for PR #3283

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 21786968..d1deffa9 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -304,7 +304,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
         "Size": f"{p.width}x{p.height}",
         "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
         "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
-        "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.filename.split('\\')[-1].split('.')[0]),
+        "Hypernet": (None if shared.loaded_hypernetwork is None else os.path.splitext(os.path.basename(shared.loaded_hypernetwork.filename))[0]),
         "Batch size": (None if p.batch_size < 2 else p.batch_size),
         "Batch pos": (None if p.batch_size < 2 else position_in_batch),
         "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
-- 
cgit v1.2.3


From df5706409386cc2e88718bd9101045587c39f8bb Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Fri, 21 Oct 2022 16:10:51 +0300
Subject: do not load aesthetic clip model until it's needed add refresh button
 for aesthetic embeddings add aesthetic params to images' infotext

---
 modules/aesthetic_clip.py                  | 40 +++++++++++++++++++----
 modules/generation_parameters_copypaste.py | 18 +++++++++--
 modules/img2img.py                         |  5 +--
 modules/processing.py                      |  4 +--
 modules/sd_models.py                       |  3 --
 modules/txt2img.py                         |  4 +--
 modules/ui.py                              | 52 ++++++++++++++++++++----------
 style.css                                  |  2 +-
 8 files changed, 89 insertions(+), 39 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py
index 34efa931..8c828541 100644
--- a/modules/aesthetic_clip.py
+++ b/modules/aesthetic_clip.py
@@ -40,6 +40,8 @@ def iter_to_batched(iterable, n=1):
 
 
 def create_ui():
+    import modules.ui
+
     with gr.Group():
         with gr.Accordion("Open for Clip Aesthetic!", open=False):
             with gr.Row():
@@ -55,6 +57,8 @@ def create_ui():
                                              label="Aesthetic imgs embedding",
                                              value="None")
 
+                modules.ui.create_refresh_button(aesthetic_imgs, shared.update_aesthetic_embeddings, lambda: {"choices": sorted(shared.aesthetic_embeddings.keys())}, "refresh_aesthetic_embeddings")
+
             with gr.Row():
                 aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs',
                                                  placeholder="This text is used to rotate the feature space of the imgs embs",
@@ -66,11 +70,21 @@ def create_ui():
     return aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative
 
 
+aesthetic_clip_model = None
+
+
+def aesthetic_clip():
+    global aesthetic_clip_model
+
+    if aesthetic_clip_model is None or aesthetic_clip_model.name_or_path != shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path:
+        aesthetic_clip_model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path)
+        aesthetic_clip_model.cpu()
+
+    return aesthetic_clip_model
+
+
 def generate_imgs_embd(name, folder, batch_size):
-    # clipModel = CLIPModel.from_pretrained(
-    #     shared.sd_model.cond_stage_model.clipModel.name_or_path
-    # )
-    model = shared.clip_model.to(device)
+    model = aesthetic_clip().to(device)
     processor = CLIPProcessor.from_pretrained(model.name_or_path)
 
     with torch.no_grad():
@@ -91,7 +105,7 @@ def generate_imgs_embd(name, folder, batch_size):
         path = str(Path(shared.cmd_opts.aesthetic_embeddings_dir) / f"{name}.pt")
         torch.save(embs, path)
 
-        model = model.cpu()
+        model.cpu()
         del processor
         del embs
         gc.collect()
@@ -132,7 +146,7 @@ class AestheticCLIP:
         self.image_embs = None
         self.load_image_embs(None)
 
-    def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
+    def set_aesthetic_params(self, p, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
                              aesthetic_slerp=True, aesthetic_imgs_text="",
                              aesthetic_slerp_angle=0.15,
                              aesthetic_text_negative=False):
@@ -145,6 +159,18 @@ class AestheticCLIP:
         self.aesthetic_steps = aesthetic_steps
         self.load_image_embs(image_embs_name)
 
+        if self.image_embs_name is not None:
+            p.extra_generation_params.update({
+                "Aesthetic LR": aesthetic_lr,
+                "Aesthetic weight": aesthetic_weight,
+                "Aesthetic steps": aesthetic_steps,
+                "Aesthetic embedding": self.image_embs_name,
+                "Aesthetic slerp": aesthetic_slerp,
+                "Aesthetic text": aesthetic_imgs_text,
+                "Aesthetic text negative": aesthetic_text_negative,
+                "Aesthetic slerp angle": aesthetic_slerp_angle,
+            })
+
     def set_skip(self, skip):
         self.skip = skip
 
@@ -168,7 +194,7 @@ class AestheticCLIP:
 
             tokens = torch.asarray(remade_batch_tokens).to(device)
 
-            model = copy.deepcopy(shared.clip_model).to(device)
+            model = copy.deepcopy(aesthetic_clip()).to(device)
             model.requires_grad_(True)
             if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0:
                 text_embs_2 = model.get_text_features(
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 0f041449..f73647da 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -4,13 +4,22 @@ import gradio as gr
 from modules.shared import script_path
 from modules import shared
 
-re_param_code = r"\s*([\w ]+):\s*([^,]+)(?:,|$)"
+re_param_code = r'\s*([\w ]+):\s*("(?:\\|\"|[^\"])+"|[^,]*)(?:,|$)'
 re_param = re.compile(re_param_code)
 re_params = re.compile(r"^(?:" + re_param_code + "){3,}$")
 re_imagesize = re.compile(r"^(\d+)x(\d+)$")
 type_of_gr_update = type(gr.update())
 
 
+def quote(text):
+    if ',' not in str(text):
+        return text
+
+    text = str(text)
+    text = text.replace('\\', '\\\\')
+    text = text.replace('"', '\\"')
+    return f'"{text}"'
+
 def parse_generation_parameters(x: str):
     """parses generation parameters string, the one you see in text field under the picture in UI:
 ```
@@ -83,7 +92,12 @@ def connect_paste(button, paste_fields, input_comp, js=None):
             else:
                 try:
                     valtype = type(output.value)
-                    val = valtype(v)
+
+                    if valtype == bool and v == "False":
+                        val = False
+                    else:
+                        val = valtype(v)
+
                     res.append(gr.update(value=val))
                 except Exception:
                     res.append(gr.update())
diff --git a/modules/img2img.py b/modules/img2img.py
index bc7c66bc..eea5199b 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -109,10 +109,7 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
         inpainting_mask_invert=inpainting_mask_invert,
     )
 
-    shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps),
-                                               aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text,
-                                               aesthetic_slerp_angle,
-                                               aesthetic_text_negative)
+    shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative)
 
     if shared.cmd_opts.enable_console_prompts:
         print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
diff --git a/modules/processing.py b/modules/processing.py
index d1deffa9..f0852cd5 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -12,7 +12,7 @@ from skimage import exposure
 from typing import Any, Dict, List, Optional
 
 import modules.sd_hijack
-from modules import devices, prompt_parser, masking, sd_samplers, lowvram
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste
 from modules.sd_hijack import model_hijack
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
@@ -318,7 +318,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
 
     generation_params.update(p.extra_generation_params)
 
-    generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
+    generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
 
     negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else ""
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 05a1df28..b1c91b0d 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -234,9 +234,6 @@ def load_model(checkpoint_info=None):
 
     sd_hijack.model_hijack.hijack(sd_model)
 
-    if shared.clip_model is None or shared.clip_model.transformer.name_or_path != sd_model.cond_stage_model.wrapped.transformer.name_or_path:
-        shared.clip_model = CLIPModel.from_pretrained(sd_model.cond_stage_model.wrapped.transformer.name_or_path)
-
     sd_model.eval()
 
     print(f"Model loaded.")
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 32ed1d8d..1761cfa2 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -36,9 +36,7 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
         firstphase_height=firstphase_height if enable_hr else None,
     )
 
-    shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps),
-                                           aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle,
-                                           aesthetic_text_negative)
+    shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative)
 
     if cmd_opts.enable_console_prompts:
         print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
diff --git a/modules/ui.py b/modules/ui.py
index 381ca925..0d020de6 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -597,27 +597,29 @@ def apply_setting(key, value):
     return value
 
 
-def create_ui(wrap_gradio_gpu_call):
-    import modules.img2img
-    import modules.txt2img
+def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
+    def refresh():
+        refresh_method()
+        args = refreshed_args() if callable(refreshed_args) else refreshed_args
 
-    def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
-        def refresh():
-            refresh_method()
-            args = refreshed_args() if callable(refreshed_args) else refreshed_args
+        for k, v in args.items():
+            setattr(refresh_component, k, v)
 
-            for k, v in args.items():
-                setattr(refresh_component, k, v)
+        return gr.update(**(args or {}))
 
-            return gr.update(**(args or {}))
+    refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
+    refresh_button.click(
+        fn=refresh,
+        inputs=[],
+        outputs=[refresh_component]
+    )
+    return refresh_button
+
+
+def create_ui(wrap_gradio_gpu_call):
+    import modules.img2img
+    import modules.txt2img
 
-        refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
-        refresh_button.click(
-            fn = refresh,
-            inputs = [],
-            outputs = [refresh_component]
-        )
-        return refresh_button
 
     with gr.Blocks(analytics_enabled=False) as txt2img_interface:
         txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False)
@@ -802,6 +804,14 @@ def create_ui(wrap_gradio_gpu_call):
                 (hr_options, lambda d: gr.Row.update(visible="Denoising strength" in d)),
                 (firstphase_width, "First pass size-1"),
                 (firstphase_height, "First pass size-2"),
+                (aesthetic_lr, "Aesthetic LR"),
+                (aesthetic_weight, "Aesthetic weight"),
+                (aesthetic_steps, "Aesthetic steps"),
+                (aesthetic_imgs, "Aesthetic embedding"),
+                (aesthetic_slerp, "Aesthetic slerp"),
+                (aesthetic_imgs_text, "Aesthetic text"),
+                (aesthetic_text_negative, "Aesthetic text negative"),
+                (aesthetic_slerp_angle, "Aesthetic slerp angle"),
             ]
 
             txt2img_preview_params = [
@@ -1077,6 +1087,14 @@ def create_ui(wrap_gradio_gpu_call):
                 (seed_resize_from_w, "Seed resize from-1"),
                 (seed_resize_from_h, "Seed resize from-2"),
                 (denoising_strength, "Denoising strength"),
+                (aesthetic_lr_im, "Aesthetic LR"),
+                (aesthetic_weight_im, "Aesthetic weight"),
+                (aesthetic_steps_im, "Aesthetic steps"),
+                (aesthetic_imgs_im, "Aesthetic embedding"),
+                (aesthetic_slerp_im, "Aesthetic slerp"),
+                (aesthetic_imgs_text_im, "Aesthetic text"),
+                (aesthetic_text_negative_im, "Aesthetic text negative"),
+                (aesthetic_slerp_angle_im, "Aesthetic slerp angle"),
             ]
             token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter])
 
diff --git a/style.css b/style.css
index 26ae36a5..5d2bacc9 100644
--- a/style.css
+++ b/style.css
@@ -477,7 +477,7 @@ input[type="range"]{
     padding: 0;
 }
 
-#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization{
+#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization, #refresh_aesthetic_embeddings{
     max-width: 2.5em;
     min-width: 2.5em;
     height: 2.4em;
-- 
cgit v1.2.3


From fccad18a59e3c2c33fefbbb1763c6a87a3a68eba Mon Sep 17 00:00:00 2001
From: timntorres <timothynarcisotorres@gmail.com>
Date: Fri, 21 Oct 2022 02:17:26 -0700
Subject: Refer to Hypernet's name, sensibly, by its name variable.

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index f0852cd5..ff1ec4c9 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -304,7 +304,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
         "Size": f"{p.width}x{p.height}",
         "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
         "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
-        "Hypernet": (None if shared.loaded_hypernetwork is None else os.path.splitext(os.path.basename(shared.loaded_hypernetwork.filename))[0]),
+        "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name),
         "Batch size": (None if p.batch_size < 2 else p.batch_size),
         "Batch pos": (None if p.batch_size < 2 else position_in_batch),
         "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
-- 
cgit v1.2.3