From caf84e8233236cd36fcd42e39e4199262dc3f4e7 Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Wed, 22 Mar 2023 17:51:40 +0000
Subject: Expose inpainting mask and composite

For inpainting, this exposes the mask and masked composite and gives
the user the ability to display these in the web UI,
save to disk, or both.
---
 modules/processing.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 59717b4c..2e5a363f 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -689,6 +689,22 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     image.info["parameters"] = text
                 output_images.append(image)
 
+                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
+                    image_mask = p.mask_for_overlay.convert('RGB')
+                    image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), p.mask_for_overlay.convert('L')).convert('RGBA')
+
+                    if opts.save_mask:
+                        images.save_image(image_mask, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-mask")
+
+                    if opts.save_mask_composite:
+                        images.save_image(image_mask_composite, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-mask-composite")
+
+                    if opts.return_mask:
+                        output_images.append(image_mask)
+                    
+                    if opts.return_mask_composite:
+                        output_images.append(image_mask_composite)
+
             del x_samples_ddim
 
             devices.torch_gc()
-- 
cgit v1.2.3


From 68999d0b15d612965e7bc7feb62d6b4d55e112fa Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Sat, 25 Mar 2023 12:52:14 -0400
Subject: Add upscale slider to img2img

---
 modules/processing.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 2e5a363f..fc4b166c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -929,7 +929,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     sampler = None
 
-    def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, **kwargs):
+    def __init__(self, init_images: Optional[list] = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: Optional[float] = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: Optional[float] = None, scale: float = 0, **kwargs):
         super().__init__(**kwargs)
 
         self.init_images = init_images
@@ -949,11 +949,27 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         self.mask = None
         self.nmask = None
         self.image_conditioning = None
+        self.scale = scale
+
+    def get_final_size(self):
+        if self.scale > 1:
+            img = self.init_images[0]
+            width = int(img.width * self.scale)
+            height = int(img.height * self.scale)
+            return width, height
+        else:
+            return self.width, self.height
+
 
     def init(self, all_prompts, all_seeds, all_subseeds):
         self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
         crop_region = None
 
+        if self.scale > 1:
+            self.extra_generation_params["Img2Img Upscale"] = self.scale
+
+        self.width, self.height = self.get_final_size()
+
         image_mask = self.image_mask
 
         if image_mask is not None:
-- 
cgit v1.2.3


From 7ea5d395c44be208f654b07ec7993aa2952f2510 Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Sun, 19 Feb 2023 03:45:43 -0800
Subject: Add upscaler to img2img

---
 modules/processing.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index fc4b166c..afb8cfd1 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -929,7 +929,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     sampler = None
 
-    def __init__(self, init_images: Optional[list] = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: Optional[float] = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: Optional[float] = None, scale: float = 0, **kwargs):
+    def __init__(self, init_images: Optional[list] = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: Optional[float] = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: Optional[float] = None, scale: float = 0, upscaler: Optional[str] = None, **kwargs):
         super().__init__(**kwargs)
 
         self.init_images = init_images
@@ -950,6 +950,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         self.nmask = None
         self.image_conditioning = None
         self.scale = scale
+        self.upscaler = upscaler
 
     def get_final_size(self):
         if self.scale > 1:
@@ -966,7 +967,16 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         crop_region = None
 
         if self.scale > 1:
-            self.extra_generation_params["Img2Img Upscale"] = self.scale
+            self.extra_generation_params["Img2Img upscale"] = self.scale
+
+        # Non-latent upscalers are run before sampling
+        # Latent upscalers are run during sampling
+        init_upscaler = None
+        if self.upscaler is not None:
+            self.extra_generation_params["Img2Img upscaler"] = self.upscaler
+            if self.upscaler not in shared.latent_upscale_modes:
+                assert len([x for x in shared.sd_upscalers if x.name == self.upscaler]) > 0, f"could not find upscaler named {self.upscaler}"
+                init_upscaler = self.upscaler
 
         self.width, self.height = self.get_final_size()
 
@@ -992,7 +1002,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = images.resize_image(2, mask, self.width, self.height)
                 self.paste_to = (x1, y1, x2-x1, y2-y1)
             else:
-                image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
+                image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height, init_upscaler)
                 np_mask = np.array(image_mask)
                 np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
                 self.mask_for_overlay = Image.fromarray(np_mask)
@@ -1009,7 +1019,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             image = images.flatten(img, opts.img2img_background_color)
 
             if crop_region is None and self.resize_mode != 3:
-                image = images.resize_image(self.resize_mode, image, self.width, self.height)
+                image = images.resize_image(self.resize_mode, image, self.width, self.height, init_upscaler)
 
             if image_mask is not None:
                 image_masked = Image.new('RGBa', (image.width, image.height))
@@ -1054,8 +1064,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
 
-        if self.resize_mode == 3:
-            self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+        latent_scale_mode = shared.latent_upscale_modes.get(self.upscaler, None) if self.upscaler is not None else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "nearest")
+        if latent_scale_mode is not None:
+            self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode=latent_scale_mode["mode"], antialias=latent_scale_mode["antialias"])
 
         if image_mask is not None:
             init_mask = latent_mask
-- 
cgit v1.2.3


From 8a34671fe91e142bce9e5556cca2258b3be9dd6e Mon Sep 17 00:00:00 2001
From: MrCheeze <fishycheeze@yahoo.ca>
Date: Fri, 24 Mar 2023 22:48:16 -0400
Subject: Add support for the Variations models (unclip-h and unclip-l)

---
 modules/processing.py | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 59717b4c..1451811c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -78,21 +78,27 @@ def apply_overlay(image, paste_loc, index, overlays):
 
 
 def txt2img_image_conditioning(sd_model, x, width, height):
-    if sd_model.model.conditioning_key not in {'hybrid', 'concat'}:
-        # Dummy zero conditioning if we're not using inpainting model.
-        # Still takes up a bit of memory, but no encoder call.
-        # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
-        return x.new_zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
+    if sd_model.model.conditioning_key in {'hybrid', 'concat'}: # Inpainting models
 
-    # The "masked-image" in this case will just be all zeros since the entire image is masked.
-    image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
-    image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
+        # The "masked-image" in this case will just be all zeros since the entire image is masked.
+        image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
+        image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
 
-    # Add the fake full 1s mask to the first dimension.
-    image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
-    image_conditioning = image_conditioning.to(x.dtype)
+        # Add the fake full 1s mask to the first dimension.
+        image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+        image_conditioning = image_conditioning.to(x.dtype)
 
-    return image_conditioning
+        return image_conditioning
+
+    elif sd_model.model.conditioning_key == "crossattn-adm": # UnCLIP models
+
+        return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device)
+
+    else:
+        # Dummy zero conditioning if we're not using inpainting or unclip models.
+        # Still takes up a bit of memory, but no encoder call.
+        # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
+        return x.new_zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
 
 
 class StableDiffusionProcessing:
@@ -190,6 +196,14 @@ class StableDiffusionProcessing:
 
         return conditioning_image
 
+    def unclip_image_conditioning(self, source_image):
+        c_adm = self.sd_model.embedder(source_image)
+        if self.sd_model.noise_augmentor is not None:
+            noise_level = 0 # TODO: Allow other noise levels?
+            c_adm, noise_level_emb = self.sd_model.noise_augmentor(c_adm, noise_level=repeat(torch.tensor([noise_level]).to(c_adm.device), '1 -> b', b=c_adm.shape[0]))
+            c_adm = torch.cat((c_adm, noise_level_emb), 1)
+        return c_adm
+
     def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None):
         self.is_using_inpainting_conditioning = True
 
@@ -241,6 +255,9 @@ class StableDiffusionProcessing:
         if self.sampler.conditioning_key in {'hybrid', 'concat'}:
             return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
 
+        if self.sampler.conditioning_key == "crossattn-adm":
+            return self.unclip_image_conditioning(source_image)
+
         # Dummy zero conditioning if we're not using inpainting or depth model.
         return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
 
-- 
cgit v1.2.3


From 433b3ab7017556a19173a86d1215ed0a0b5b1396 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 28 Mar 2023 20:36:57 +0300
Subject: Revert "Merge pull request #7931 from space-nuko/img2img-enhance"

This reverts commit 426875937048e21305ac24bea53df06523bdaa81, reversing
changes made to 1b63afbedc7789c0eb9a4742b780ab304d7a9caf.
---
 modules/processing.py | 37 +++++--------------------------------
 1 file changed, 5 insertions(+), 32 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 509b80b9..6d9c6a8d 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -946,7 +946,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     sampler = None
 
-    def __init__(self, init_images: Optional[list] = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: Optional[float] = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: Optional[float] = None, scale: float = 0, upscaler: Optional[str] = None, **kwargs):
+    def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, **kwargs):
         super().__init__(**kwargs)
 
         self.init_images = init_images
@@ -966,37 +966,11 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         self.mask = None
         self.nmask = None
         self.image_conditioning = None
-        self.scale = scale
-        self.upscaler = upscaler
-
-    def get_final_size(self):
-        if self.scale > 1:
-            img = self.init_images[0]
-            width = int(img.width * self.scale)
-            height = int(img.height * self.scale)
-            return width, height
-        else:
-            return self.width, self.height
-
 
     def init(self, all_prompts, all_seeds, all_subseeds):
         self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
         crop_region = None
 
-        if self.scale > 1:
-            self.extra_generation_params["Img2Img upscale"] = self.scale
-
-        # Non-latent upscalers are run before sampling
-        # Latent upscalers are run during sampling
-        init_upscaler = None
-        if self.upscaler is not None:
-            self.extra_generation_params["Img2Img upscaler"] = self.upscaler
-            if self.upscaler not in shared.latent_upscale_modes:
-                assert len([x for x in shared.sd_upscalers if x.name == self.upscaler]) > 0, f"could not find upscaler named {self.upscaler}"
-                init_upscaler = self.upscaler
-
-        self.width, self.height = self.get_final_size()
-
         image_mask = self.image_mask
 
         if image_mask is not None:
@@ -1019,7 +993,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = images.resize_image(2, mask, self.width, self.height)
                 self.paste_to = (x1, y1, x2-x1, y2-y1)
             else:
-                image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height, init_upscaler)
+                image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
                 np_mask = np.array(image_mask)
                 np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
                 self.mask_for_overlay = Image.fromarray(np_mask)
@@ -1036,7 +1010,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             image = images.flatten(img, opts.img2img_background_color)
 
             if crop_region is None and self.resize_mode != 3:
-                image = images.resize_image(self.resize_mode, image, self.width, self.height, init_upscaler)
+                image = images.resize_image(self.resize_mode, image, self.width, self.height)
 
             if image_mask is not None:
                 image_masked = Image.new('RGBa', (image.width, image.height))
@@ -1081,9 +1055,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
 
-        latent_scale_mode = shared.latent_upscale_modes.get(self.upscaler, None) if self.upscaler is not None else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "nearest")
-        if latent_scale_mode is not None:
-            self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode=latent_scale_mode["mode"], antialias=latent_scale_mode["antialias"])
+        if self.resize_mode == 3:
+            self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
 
         if image_mask is not None:
             init_mask = latent_mask
-- 
cgit v1.2.3