From e715e46b6aa7f2e5e147cfa1fa2f49b1d926a074 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 16:10:22 -0700
Subject: Implements "scheduling" for blending of the original latents and a
 latent blending formula that preserves details in blend transition areas.

---
 modules/sd_samplers_cfg_denoiser.py | 61 +++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index b8101d38..c4d6fda6 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -43,6 +43,9 @@ class CFGDenoiser(torch.nn.Module):
         self.model_wrap = None
         self.mask = None
         self.nmask = None
+        self.mask_blend_power = 1
+        self.mask_blend_scale = 1
+        self.mask_blend_offset = 0
         self.init_latent = None
         self.steps = None
         """number of steps as specified by user in UI"""
@@ -56,6 +59,9 @@ class CFGDenoiser(torch.nn.Module):
         self.sampler = sampler
         self.model_wrap = None
         self.p = None
+
+        # NOTE: masking before denoising can cause the original latents to be oversmoothed
+        # as the original latents do not have noise
         self.mask_before_denoising = False
 
     @property
@@ -89,6 +95,55 @@ class CFGDenoiser(torch.nn.Module):
         self.sampler.sampler_extra_args['uncond'] = uc
 
     def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
+        def latent_blend(a, b, t):
+            """
+            Interpolates two latent image representations according to the parameter t,
+            where the interpolated vectors' magnitudes are also interpolated separately.
+            The "detail_preservation" factor biases the magnitude interpolation towards
+            the larger of the two magnitudes.
+            """
+            # Record the original latent vector magnitudes.
+            # We bring them to a power so that larger magnitudes are favored over smaller ones.
+            # 64-bit operations are used here to allow large exponents.
+            detail_preservation = 32
+            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** detail_preservation
+            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** detail_preservation
+
+            one_minus_t = 1 - t
+
+            # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
+            interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / detail_preservation)
+
+            # Linearly interpolate the image vectors.
+            image_interp = a * one_minus_t + b * t
+
+            # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
+            # 64-bit operations are used here to allow large exponents.
+            image_interp_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64) + 0.0001
+
+            # Change the linearly interpolated image vectors' magnitudes to the value we want.
+            # This is the last 64-bit operation.
+            image_interp *= (interp_magnitude / image_interp_magnitude).to(image_interp.dtype)
+
+            return image_interp
+
+        def get_modified_nmask(nmask, _sigma):
+            """
+            Converts a negative mask representing the transparency of the original latent vectors being overlayed
+            to a mask that is scaled according to the denoising strength for this step.
+
+            Where:
+                0 = fully opaque, infinite density, fully masked
+                1 = fully transparent, zero density, fully unmasked
+
+            We bring this transparency to a power, as this allows one to simulate N number of blending operations
+            where N can be any positive real value. Using this one can control the balance of influence between
+            the denoiser and the original latents according to the sigma value.
+
+            NOTE: "mask" is not used
+            """
+            return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale + self.mask_blend_offset)
+
         if state.interrupted or state.skipped:
             raise sd_samplers_common.InterruptedException
 
@@ -105,8 +160,9 @@ class CFGDenoiser(torch.nn.Module):
 
         assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
 
+        # Blend in the original latents (before)
         if self.mask_before_denoising and self.mask is not None:
-            x = self.init_latent * self.mask + self.nmask * x
+            x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma))
 
         batch_size = len(conds_list)
         repeats = [len(conds_list[i]) for i in range(batch_size)]
@@ -207,8 +263,9 @@ class CFGDenoiser(torch.nn.Module):
         else:
             denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
 
+        # Blend in the original latents (after)
         if not self.mask_before_denoising and self.mask is not None:
-            denoised = self.init_latent * self.mask + self.nmask * denoised
+            denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma))
 
         self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
 
-- 
cgit v1.2.3


From c5c7fa06aae1ae9f8b6d29ae2da3874921d4729b Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 22:35:07 -0700
Subject: Added slider for detail preservation strength, removed largely
 needless offset parameter, changed labels in UI and for saving to/pasting
 data from PNG files.

---
 modules/img2img.py                  | 10 +++++-----
 modules/processing.py               |  2 +-
 modules/sd_samplers_cfg_denoiser.py | 11 +++++------
 modules/sd_samplers_common.py       |  2 +-
 modules/ui.py                       | 14 +++++++-------
 scripts/outpainting_mk_2.py         | 12 ++++++------
 scripts/poor_mans_outpainting.py    | 12 ++++++------
 test/test_img2img.py                |  2 +-
 8 files changed, 32 insertions(+), 33 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/img2img.py b/modules/img2img.py
index 240d0588..023808d6 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -134,7 +134,7 @@ def img2img(id_task: str,
             mask_alpha: float,
             mask_blend_power: float,
             mask_blend_scale: float,
-            mask_blend_offset: float,
+            inpaint_detail_preservation: float,
             inpainting_fill: int,
             n_iter: int,
             batch_size: int,
@@ -216,7 +216,7 @@ def img2img(id_task: str,
         mask_blur=mask_blur,
         mask_blend_power=mask_blend_power,
         mask_blend_scale=mask_blend_scale,
-        mask_blend_offset=mask_blend_offset,
+        inpaint_detail_preservation=inpaint_detail_preservation,
         inpainting_fill=inpainting_fill,
         resize_mode=resize_mode,
         denoising_strength=denoising_strength,
@@ -237,9 +237,9 @@ def img2img(id_task: str,
 
     if mask:
         p.extra_generation_params["Mask blur"] = mask_blur
-        p.extra_generation_params["Mask blend power"] = mask_blend_power
-        p.extra_generation_params["Mask blend scale"] = mask_blend_scale
-        p.extra_generation_params["Mask blend offset"] = mask_blend_offset
+        p.extra_generation_params["Mask blending bias"] = mask_blend_power
+        p.extra_generation_params["Mask blending preservation"] = mask_blend_scale
+        p.extra_generation_params["Mask blending detail boost"] = inpaint_detail_preservation
 
     with closing(p):
         if is_batch:
diff --git a/modules/processing.py b/modules/processing.py
index da4d6fda..361e8b05 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1351,7 +1351,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur: int = None
     mask_blend_power: float = 1
     mask_blend_scale: float = 1
-    mask_blend_offset: float = 0
+    inpaint_detail_preservation: float = 16
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index c4d6fda6..598cd487 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -45,7 +45,7 @@ class CFGDenoiser(torch.nn.Module):
         self.nmask = None
         self.mask_blend_power = 1
         self.mask_blend_scale = 1
-        self.mask_blend_offset = 0
+        self.inpaint_detail_preservation = 16
         self.init_latent = None
         self.steps = None
         """number of steps as specified by user in UI"""
@@ -105,14 +105,13 @@ class CFGDenoiser(torch.nn.Module):
             # Record the original latent vector magnitudes.
             # We bring them to a power so that larger magnitudes are favored over smaller ones.
             # 64-bit operations are used here to allow large exponents.
-            detail_preservation = 32
-            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** detail_preservation
-            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** detail_preservation
+            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation
+            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation
 
             one_minus_t = 1 - t
 
             # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
-            interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / detail_preservation)
+            interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / self.inpaint_detail_preservation)
 
             # Linearly interpolate the image vectors.
             image_interp = a * one_minus_t + b * t
@@ -142,7 +141,7 @@ class CFGDenoiser(torch.nn.Module):
 
             NOTE: "mask" is not used
             """
-            return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale + self.mask_blend_offset)
+            return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale)
 
         if state.interrupted or state.skipped:
             raise sd_samplers_common.InterruptedException
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 8904da2f..ecd8ab0a 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -279,7 +279,7 @@ class Sampler:
         self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
         self.model_wrap_cfg.mask_blend_power = p.mask_blend_power if hasattr(p, 'mask_blend_power') else None
         self.model_wrap_cfg.mask_blend_scale = p.mask_blend_scale if hasattr(p, 'mask_blend_scale') else None
-        self.model_wrap_cfg.mask_blend_offset = p.mask_blend_offset if hasattr(p, 'mask_blend_offset') else None
+        self.model_wrap_cfg.inpaint_detail_preservation = p.inpaint_detail_preservation if hasattr(p, 'inpaint_detail_preservation') else None
         self.model_wrap_cfg.step = 0
         self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
         self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0)
diff --git a/modules/ui.py b/modules/ui.py
index 86c13086..f5e20147 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -732,9 +732,9 @@ def create_ui():
                             with FormRow():
                                 mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur")
                                 mask_alpha = gr.Slider(label="Mask transparency", visible=False, elem_id="img2img_mask_alpha")
-                                mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power")
-                                mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_scale")
-                                mask_blend_offset = gr.Slider(label='Mask blend offset', minimum=-4, maximum=4, step=0.1, value=0, elem_id="img2img_mask_blend_offset")
+                                mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power")
+                                mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=1, elem_id="img2img_mask_blend_scale")
+                                inpaint_detail_preservation = gr.Slider(label='Blending detail boost', minimum=1, maximum=32, step=0.5, value=16, elem_id="img2img_mask_blend_offset")
 
                             with FormRow():
                                 inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
@@ -786,7 +786,7 @@ def create_ui():
                     mask_alpha,
                     mask_blend_power,
                     mask_blend_scale,
-                    mask_blend_offset,
+                    inpaint_detail_preservation,
                     inpainting_fill,
                     batch_count,
                     batch_size,
@@ -885,9 +885,9 @@ def create_ui():
                 (toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update()),
                 (denoising_strength, "Denoising strength"),
                 (mask_blur, "Mask blur"),
-                (mask_blend_power, "Mask blend power"),
-                (mask_blend_scale, "Mask blend scale"),
-                (mask_blend_offset, "Mask blend offset"),
+                (mask_blend_power, "Mask blending bias"),
+                (mask_blend_scale, "Mask blending preservation"),
+                (inpaint_detail_preservation, "Mask blending detail boost"),
                 *scripts.scripts_img2img.infotext_fields
             ]
             parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings)
diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py
index 6aa97edf..54d95825 100644
--- a/scripts/outpainting_mk_2.py
+++ b/scripts/outpainting_mk_2.py
@@ -133,16 +133,16 @@ class Script(scripts.Script):
 
         pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels"))
         mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8, elem_id=self.elem_id("mask_blur"))
-        mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power"))
-        mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale"))
-        mask_blend_offset = gr.Slider(label='Mask blend scale', minimum=-4, maximum=4, step=0.1, value=1, elem_id=self.elem_id("mask_blend_offset"))
+        mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power"))
+        mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale"))
+        inpaint_detail_preservation = gr.Slider(label='Blending detail boost', minimum=1, maximum=32, step=0.5, value=16, elem_id=self.elem_id("inpaint_detail_preservation"))
         direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction"))
         noise_q = gr.Slider(label="Fall-off exponent (lower=higher detail)", minimum=0.0, maximum=4.0, step=0.01, value=1.0, elem_id=self.elem_id("noise_q"))
         color_variation = gr.Slider(label="Color variation", minimum=0.0, maximum=1.0, step=0.01, value=0.05, elem_id=self.elem_id("color_variation"))
 
-        return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, direction, noise_q, color_variation]
+        return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation]
 
-    def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, direction, noise_q, color_variation):
+    def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation):
         initial_seed_and_info = [None, None]
 
         process_width = p.width
@@ -172,7 +172,7 @@ class Script(scripts.Script):
         p.mask_blur_y = mask_blur_y*4
         p.mask_blend_power = mask_blend_power
         p.mask_blend_scale = mask_blend_scale
-        p.mask_blend_offset = mask_blend_offset
+        p.inpaint_detail_preservation = inpaint_detail_preservation
 
         init_img = p.init_images[0]
         target_w = math.ceil((init_img.width + left + right) / 64) * 64
diff --git a/scripts/poor_mans_outpainting.py b/scripts/poor_mans_outpainting.py
index b10140f1..e3acb3d4 100644
--- a/scripts/poor_mans_outpainting.py
+++ b/scripts/poor_mans_outpainting.py
@@ -22,22 +22,22 @@ class Script(scripts.Script):
 
         pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels"))
         mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id=self.elem_id("mask_blur"))
-        mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power"))
-        mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale"))
-        mask_blend_offset = gr.Slider(label='Mask blend offset', minimum=-4, maximum=4, step=0.1, value=0, elem_id=self.elem_id("mask_blend_offset"))
+        mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power"))
+        mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale"))
+        inpaint_detail_preservation = gr.Slider(label='Blending detail boost', minimum=1, maximum=32, step=0.5, value=16, elem_id=self.elem_id("inpaint_detail_preservation"))
         inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", elem_id=self.elem_id("inpainting_fill"))
         direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction"))
 
-        return [pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, inpainting_fill, direction]
+        return [pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction]
 
-    def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, inpainting_fill, direction):
+    def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction):
         initial_seed = None
         initial_info = None
 
         p.mask_blur = mask_blur * 2
         p.mask_blend_power = mask_blend_power
         p.mask_blend_scale = mask_blend_scale
-        p.mask_blend_offset = mask_blend_offset
+        p.inpaint_detail_preservation = inpaint_detail_preservation
 
         p.inpainting_fill = inpainting_fill
         p.inpaint_full_res = False
diff --git a/test/test_img2img.py b/test/test_img2img.py
index 6289e59e..88b06eb8 100644
--- a/test/test_img2img.py
+++ b/test/test_img2img.py
@@ -26,7 +26,7 @@ def simple_img2img_request(img2img_basic_image_base64):
         "mask_blur": 4,
         "mask_blend_power": 1,
         "mask_blend_scale": 1,
-        "mask_blend_offset": 0,
+        "inpaint_detail_preservation": 16,
         "n_iter": 1,
         "negative_prompt": "",
         "override_settings": {},
-- 
cgit v1.2.3


From c7a1ff87207544dd4bcf3aefffa67a4a38678c16 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 23:31:10 -0700
Subject: Tweaked default values.

---
 modules/processing.py               | 4 ++--
 modules/sd_samplers_cfg_denoiser.py | 4 ++--
 test/test_img2img.py                | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/processing.py b/modules/processing.py
index 361e8b05..92fdebad 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1350,8 +1350,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_y: int = 4
     mask_blur: int = None
     mask_blend_power: float = 1
-    mask_blend_scale: float = 1
-    inpaint_detail_preservation: float = 16
+    mask_blend_scale: float = 0.5
+    inpaint_detail_preservation: float = 4
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 598cd487..ceb612d7 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -44,8 +44,8 @@ class CFGDenoiser(torch.nn.Module):
         self.mask = None
         self.nmask = None
         self.mask_blend_power = 1
-        self.mask_blend_scale = 1
-        self.inpaint_detail_preservation = 16
+        self.mask_blend_scale = 0.5
+        self.inpaint_detail_preservation = 4
         self.init_latent = None
         self.steps = None
         """number of steps as specified by user in UI"""
diff --git a/test/test_img2img.py b/test/test_img2img.py
index 88b06eb8..5cda2dba 100644
--- a/test/test_img2img.py
+++ b/test/test_img2img.py
@@ -25,8 +25,8 @@ def simple_img2img_request(img2img_basic_image_base64):
         "mask": None,
         "mask_blur": 4,
         "mask_blend_power": 1,
-        "mask_blend_scale": 1,
-        "inpaint_detail_preservation": 16,
+        "mask_blend_scale": 0.5,
+        "inpaint_detail_preservation": 4,
         "n_iter": 1,
         "negative_prompt": "",
         "override_settings": {},
-- 
cgit v1.2.3


From bb04d400c95df01d191ef6c1a43e66b95425fa33 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Sat, 2 Dec 2023 21:08:26 -0700
Subject: Rewrote latent_blend() to use in-place operations and to aggressively
 "del" references with the intention of minimizing allocations and easing
 garbage collection.

---
 modules/sd_samplers_cfg_denoiser.py | 41 +++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 13 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index ceb612d7..efbe7a40 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -102,29 +102,44 @@ class CFGDenoiser(torch.nn.Module):
             The "detail_preservation" factor biases the magnitude interpolation towards
             the larger of the two magnitudes.
             """
-            # Record the original latent vector magnitudes.
-            # We bring them to a power so that larger magnitudes are favored over smaller ones.
-            # 64-bit operations are used here to allow large exponents.
-            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation
-            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation
+            # NOTE: We use inplace operations wherever possible.
 
             one_minus_t = 1 - t
 
-            # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
-            interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / self.inpaint_detail_preservation)
-
             # Linearly interpolate the image vectors.
-            image_interp = a * one_minus_t + b * t
+            a_scaled = a * one_minus_t
+            b_scaled = b * t
+            image_interp = a_scaled
+            image_interp.add_(b_scaled)
+            result_type = image_interp.dtype
+            del a_scaled, b_scaled
 
             # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
             # 64-bit operations are used here to allow large exponents.
-            image_interp_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64) + 0.0001
+            current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001)
+
+            # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
+            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * one_minus_t
+            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * t
+            desired_magnitude = a_magnitude
+            desired_magnitude.add_(b_magnitude).pow_(1 / self.inpaint_detail_preservation)
+            del a_magnitude, b_magnitude, one_minus_t
 
             # Change the linearly interpolated image vectors' magnitudes to the value we want.
             # This is the last 64-bit operation.
-            image_interp *= (interp_magnitude / image_interp_magnitude).to(image_interp.dtype)
-
-            return image_interp
+            image_interp_scaling_factor = desired_magnitude
+            image_interp_scaling_factor.div_(current_magnitude)
+            image_interp_scaled = image_interp
+            image_interp_scaled.mul_(image_interp_scaling_factor)
+            del current_magnitude
+            del desired_magnitude
+            del image_interp
+            del image_interp_scaling_factor
+
+            image_interp_scaled = image_interp_scaled.to(result_type)
+            del result_type
+
+            return image_interp_scaled
 
         def get_modified_nmask(nmask, _sigma):
             """
-- 
cgit v1.2.3


From aaacf4823241450d88315af9d465d6815119fe0d Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 01:27:22 -0700
Subject: Organized the settings and UI of soft inpainting to allow for
 toggling the feature, and centralizes default values to reduce the amount of
 copy-pasta.

---
 modules/img2img.py                  |  14 ++--
 modules/processing.py               |   5 +-
 modules/sd_samplers_cfg_denoiser.py |  35 +++++++---
 modules/sd_samplers_common.py       |   4 +-
 modules/soft_inpainting.py          | 133 ++++++++++++++++++++++++++++++++++++
 modules/ui.py                       |  17 +++--
 scripts/outpainting_mk_2.py         |  15 ++--
 scripts/poor_mans_outpainting.py    |  15 ++--
 test/test_img2img.py                |   8 ++-
 9 files changed, 197 insertions(+), 49 deletions(-)
 create mode 100644 modules/soft_inpainting.py

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/img2img.py b/modules/img2img.py
index 596f741c..3aa8a9ce 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -15,6 +15,7 @@ import modules.shared as shared
 import modules.processing as processing
 from modules.ui import plaintext_to_html
 import modules.scripts
+import modules.soft_inpainting as si
 
 
 def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=False, scale_by=1.0, use_png_info=False, png_info_props=None, png_info_dir=None):
@@ -162,6 +163,7 @@ def img2img(id_task: str,
             sampler_name: str,
             mask_blur: int,
             mask_alpha: float,
+            mask_blend_enabled: bool,
             mask_blend_power: float,
             mask_blend_scale: float,
             inpaint_detail_preservation: float,
@@ -227,6 +229,9 @@ def img2img(id_task: str,
 
     assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
 
+    soft_inpainting = si.SoftInpaintingSettings(mask_blend_power, mask_blend_scale, inpaint_detail_preservation) \
+        if mask_blend_enabled else None
+
     p = StableDiffusionProcessingImg2Img(
         sd_model=shared.sd_model,
         outpath_samples=opts.outdir_samples or opts.outdir_img2img_samples,
@@ -244,9 +249,7 @@ def img2img(id_task: str,
         init_images=[image],
         mask=mask,
         mask_blur=mask_blur,
-        mask_blend_power=mask_blend_power,
-        mask_blend_scale=mask_blend_scale,
-        inpaint_detail_preservation=inpaint_detail_preservation,
+        soft_inpainting=soft_inpainting,
         inpainting_fill=inpainting_fill,
         resize_mode=resize_mode,
         denoising_strength=denoising_strength,
@@ -267,9 +270,8 @@ def img2img(id_task: str,
 
     if mask:
         p.extra_generation_params["Mask blur"] = mask_blur
-        p.extra_generation_params["Mask blending bias"] = mask_blend_power
-        p.extra_generation_params["Mask blending preservation"] = mask_blend_scale
-        p.extra_generation_params["Mask blending contrast boost"] = inpaint_detail_preservation
+        if soft_inpainting is not None:
+            soft_inpainting.add_generation_params(p.extra_generation_params)
 
     with closing(p):
         if is_batch:
diff --git a/modules/processing.py b/modules/processing.py
index cd7216f8..b209c84a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -30,6 +30,7 @@ import modules.sd_models as sd_models
 import modules.sd_vae as sd_vae
 from ldm.data.util import AddMiDaS
 from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
+import modules.soft_inpainting as si
 
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
@@ -1425,9 +1426,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_x: int = 4
     mask_blur_y: int = 4
     mask_blur: int = None
-    mask_blend_power: float = 1
-    mask_blend_scale: float = 0.5
-    inpaint_detail_preservation: float = 4
+    soft_inpainting: si.SoftInpaintingParameters = si.default
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index efbe7a40..0ee0b7dd 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -6,6 +6,7 @@ import modules.shared as shared
 from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback
 from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback
 from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback
+import modules.soft_inpainting as si
 
 
 def catenate_conds(conds):
@@ -43,9 +44,7 @@ class CFGDenoiser(torch.nn.Module):
         self.model_wrap = None
         self.mask = None
         self.nmask = None
-        self.mask_blend_power = 1
-        self.mask_blend_scale = 0.5
-        self.inpaint_detail_preservation = 4
+        self.soft_inpainting: si.SoftInpaintingParameters = None
         self.init_latent = None
         self.steps = None
         """number of steps as specified by user in UI"""
@@ -95,7 +94,8 @@ class CFGDenoiser(torch.nn.Module):
         self.sampler.sampler_extra_args['uncond'] = uc
 
     def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
-        def latent_blend(a, b, t):
+        def latent_blend(a, b, t, one_minus_t=None):
+
             """
             Interpolates two latent image representations according to the parameter t,
             where the interpolated vectors' magnitudes are also interpolated separately.
@@ -104,7 +104,11 @@ class CFGDenoiser(torch.nn.Module):
             """
             # NOTE: We use inplace operations wherever possible.
 
-            one_minus_t = 1 - t
+            if one_minus_t is None:
+                one_minus_t = 1 - t
+
+            if self.soft_inpainting is None:
+                return a * one_minus_t + b * t
 
             # Linearly interpolate the image vectors.
             a_scaled = a * one_minus_t
@@ -119,10 +123,10 @@ class CFGDenoiser(torch.nn.Module):
             current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001)
 
             # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
-            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * one_minus_t
-            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * t
+            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * one_minus_t
+            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * t
             desired_magnitude = a_magnitude
-            desired_magnitude.add_(b_magnitude).pow_(1 / self.inpaint_detail_preservation)
+            desired_magnitude.add_(b_magnitude).pow_(1 / self.soft_inpainting.inpaint_detail_preservation)
             del a_magnitude, b_magnitude, one_minus_t
 
             # Change the linearly interpolated image vectors' magnitudes to the value we want.
@@ -156,7 +160,10 @@ class CFGDenoiser(torch.nn.Module):
 
             NOTE: "mask" is not used
             """
-            return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale)
+            if self.soft_inpainting is None:
+                return nmask
+
+            return torch.pow(nmask, (_sigma ** self.soft_inpainting.mask_blend_power) * self.soft_inpainting.mask_blend_scale)
 
         if state.interrupted or state.skipped:
             raise sd_samplers_common.InterruptedException
@@ -176,7 +183,10 @@ class CFGDenoiser(torch.nn.Module):
 
         # Blend in the original latents (before)
         if self.mask_before_denoising and self.mask is not None:
-            x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma))
+            if self.soft_inpainting is None:
+                x = latent_blend(self.init_latent, x, self.nmask, self.mask)
+            else:
+                x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma))
 
         batch_size = len(conds_list)
         repeats = [len(conds_list[i]) for i in range(batch_size)]
@@ -279,7 +289,10 @@ class CFGDenoiser(torch.nn.Module):
 
         # Blend in the original latents (after)
         if not self.mask_before_denoising and self.mask is not None:
-            denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma))
+            if self.soft_inpainting is None:
+                denoised = latent_blend(self.init_latent, denoised, self.nmask, self.mask)
+            else:
+                denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma))
 
         self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
 
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index ecd8ab0a..9682bee3 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -277,9 +277,7 @@ class Sampler:
         self.model_wrap_cfg.p = p
         self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
         self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
-        self.model_wrap_cfg.mask_blend_power = p.mask_blend_power if hasattr(p, 'mask_blend_power') else None
-        self.model_wrap_cfg.mask_blend_scale = p.mask_blend_scale if hasattr(p, 'mask_blend_scale') else None
-        self.model_wrap_cfg.inpaint_detail_preservation = p.inpaint_detail_preservation if hasattr(p, 'inpaint_detail_preservation') else None
+        self.model_wrap_cfg.soft_inpainting = p.soft_inpainting if hasattr(p, 'soft_inpainting') else None
         self.model_wrap_cfg.step = 0
         self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
         self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0)
diff --git a/modules/soft_inpainting.py b/modules/soft_inpainting.py
new file mode 100644
index 00000000..259c36ec
--- /dev/null
+++ b/modules/soft_inpainting.py
@@ -0,0 +1,133 @@
+class SoftInpaintingSettings:
+    def __init__(self, mask_blend_power, mask_blend_scale, inpaint_detail_preservation):
+        self.mask_blend_power = mask_blend_power
+        self.mask_blend_scale = mask_blend_scale
+        self.inpaint_detail_preservation = inpaint_detail_preservation
+
+    def get_paste_fields(self):
+        return [
+            (self.mask_blend_power, gen_param_labels.mask_blend_power),
+            (self.mask_blend_scale, gen_param_labels.mask_blend_scale),
+            (self.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation),
+        ]
+
+    def add_generation_params(self, dest):
+        dest[enabled_gen_param_label] = True
+        dest[gen_param_labels.mask_blend_power] = self.mask_blend_power
+        dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale
+        dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation
+
+
+enabled_ui_label = "Soft inpainting"
+enabled_gen_param_label = "Soft inpainting enabled"
+enabled_el_id = "soft_inpainting_enabled"
+
+default = SoftInpaintingSettings(1, 0.5, 4)
+ui_labels = SoftInpaintingSettings("Schedule bias", "Preservation strength", "Transition contrast boost")
+
+ui_info = SoftInpaintingSettings(
+    mask_blend_power="Shifts when preservation of original content occurs during denoising.",
+                     # "Below 1: Stronger preservation near the end (with low sigma)\n"
+                     # "1: Balanced (proportional to sigma)\n"
+                     # "Above 1: Stronger preservation in the beginning (with high sigma)",
+    mask_blend_scale="How strongly partially masked content should be preserved.",
+                     # "Low values: Favors generated content.\n"
+                     # "High values: Favors original content.",
+    inpaint_detail_preservation="Amplifies the contrast that may be lost in partially masked regions.")
+
+gen_param_labels = SoftInpaintingSettings("Soft inpainting schedule bias", "Soft inpainting preservation strength", "Soft inpainting transition contrast boost")
+el_ids = SoftInpaintingSettings("mask_blend_power", "mask_blend_scale", "inpaint_detail_preservation")
+
+
+def gradio_ui():
+    import gradio as gr
+    from modules.ui_components import InputAccordion
+    """
+            with InputAccordion(False, label="Refiner", elem_id=self.elem_id("enable")) as enable_refiner:
+            with gr.Row():
+                refiner_checkpoint = gr.Dropdown(label='Checkpoint', elem_id=self.elem_id("checkpoint"), choices=sd_models.checkpoint_tiles(), value='', tooltip="switch to another model in the middle of generation")
+                create_refresh_button(refiner_checkpoint, sd_models.list_models, lambda: {"choices": sd_models.checkpoint_tiles()}, self.elem_id("checkpoint_refresh"))
+
+                refiner_switch_at = gr.Slider(value=0.8, label="Switch at", minimum=0.01, maximum=1.0, step=0.01, elem_id=self.elem_id("switch_at"), tooltip="fraction of sampling steps when the switch to refiner model should happen; 1=never, 0.5=switch in the middle of generation")
+
+    """
+    with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled:
+        with gr.Group():
+            gr.Markdown(
+                """
+                Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity.
+                **High _Mask blur_** values are recommended!
+                """)
+
+            result = SoftInpaintingSettings(
+                gr.Slider(label=ui_labels.mask_blend_power,
+                          info=ui_info.mask_blend_power,
+                          minimum=0,
+                          maximum=8,
+                          step=0.1,
+                          value=default.mask_blend_power,
+                          elem_id=el_ids.mask_blend_power),
+                gr.Slider(label=ui_labels.mask_blend_scale,
+                          info=ui_info.mask_blend_scale,
+                          minimum=0,
+                          maximum=8,
+                          step=0.05,
+                          value=default.mask_blend_scale,
+                          elem_id=el_ids.mask_blend_scale),
+                gr.Slider(label=ui_labels.inpaint_detail_preservation,
+                          info=ui_info.inpaint_detail_preservation,
+                          minimum=1,
+                          maximum=32,
+                          step=0.5,
+                          value=default.inpaint_detail_preservation,
+                          elem_id=el_ids.inpaint_detail_preservation))
+
+            with gr.Accordion("Help", open=False):
+                gr.Markdown(
+                    f"""
+                    ### {ui_labels.mask_blend_power}
+                    
+                    The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas).
+                    This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step.
+                    This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation.
+                    
+                    - **Below 1**: Stronger preservation near the end (with low sigma)
+                    - **1**: Balanced (proportional to sigma)
+                    - **Above 1**: Stronger preservation in the beginning (with high sigma)
+                    """)
+                gr.Markdown(
+                    f"""
+                    ### {ui_labels.mask_blend_scale}
+                    
+                    Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content.
+                    This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength.
+                    
+                    - **Low values**: Favors generated content.
+                    - **High values**: Favors original content.
+                    """)
+                gr.Markdown(
+                    f"""
+                    ### {ui_labels.inpaint_detail_preservation}
+                    
+                    This parameter controls how the original latent vectors and denoised latent vectors are interpolated.
+                    With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors.
+                    This can prevent the loss of contrast that occurs with linear interpolation.
+                    
+                    - **Low values**: Softer blending, details may fade.
+                    - **High values**: Stronger contrast, may over-saturate colors.
+                    """)
+
+    return (
+        [
+            soft_inpainting_enabled,
+            result.mask_blend_power,
+            result.mask_blend_scale,
+            result.inpaint_detail_preservation
+        ],
+        [
+            (soft_inpainting_enabled, enabled_gen_param_label),
+            (result.mask_blend_power, gen_param_labels.mask_blend_power),
+            (result.mask_blend_scale, gen_param_labels.mask_blend_scale),
+            (result.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation)
+        ]
+    )
diff --git a/modules/ui.py b/modules/ui.py
index b13ed66c..0e4fb17a 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -29,6 +29,7 @@ import modules.shared as shared
 from modules import prompt_parser
 from modules.sd_hijack import model_hijack
 from modules.generation_parameters_copypaste import image_from_url_text
+import modules.soft_inpainting as si
 
 create_setting_component = ui_settings.create_setting_component
 
@@ -678,9 +679,16 @@ def create_ui():
                             with FormRow():
                                 mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur")
                                 mask_alpha = gr.Slider(label="Mask transparency", visible=False, elem_id="img2img_mask_alpha")
+
+                            with FormRow():
+                                soft_inpainting = si.gradio_ui()
+
+
+                            """
                                 mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power")
                                 mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id="img2img_mask_blend_scale")
                                 inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id="img2img_mask_blend_offset")
+                            """
 
                             with FormRow():
                                 inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
@@ -736,9 +744,7 @@ def create_ui():
                     sampler_name,
                     mask_blur,
                     mask_alpha,
-                    mask_blend_power,
-                    mask_blend_scale,
-                    inpaint_detail_preservation,
+                    *(soft_inpainting[0]),
                     inpainting_fill,
                     batch_count,
                     batch_size,
@@ -837,11 +843,10 @@ def create_ui():
                 (toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update()),
                 (denoising_strength, "Denoising strength"),
                 (mask_blur, "Mask blur"),
-                (mask_blend_power, "Mask blending bias"),
-                (mask_blend_scale, "Mask blending preservation"),
-                (inpaint_detail_preservation, "Mask blending contrast boost"),
+                *(soft_inpainting[1]),
                 *scripts.scripts_img2img.infotext_fields
             ]
+
             parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings)
             parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields, override_settings)
             parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(
diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py
index bd9cb61b..f7888688 100644
--- a/scripts/outpainting_mk_2.py
+++ b/scripts/outpainting_mk_2.py
@@ -10,6 +10,7 @@ from PIL import Image, ImageDraw
 from modules import images
 from modules.processing import Processed, process_images
 from modules.shared import opts, state
+import modules.soft_inpainting as si
 
 
 # this function is taken from https://github.com/parlance-zz/g-diffuser-bot
@@ -133,16 +134,14 @@ class Script(scripts.Script):
 
         pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels"))
         mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8, elem_id=self.elem_id("mask_blur"))
-        mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power"))
-        mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id=self.elem_id("mask_blend_scale"))
-        inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id=self.elem_id("inpaint_detail_preservation"))
+        soft_inpainting = si.gradio_ui()[0]
         direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction"))
         noise_q = gr.Slider(label="Fall-off exponent (lower=higher detail)", minimum=0.0, maximum=4.0, step=0.01, value=1.0, elem_id=self.elem_id("noise_q"))
         color_variation = gr.Slider(label="Color variation", minimum=0.0, maximum=1.0, step=0.01, value=0.05, elem_id=self.elem_id("color_variation"))
 
-        return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation]
+        return [info, pixels, mask_blur, *soft_inpainting, direction, noise_q, color_variation]
 
-    def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation):
+    def run(self, p, _, pixels, mask_blur, mask_blend_enabled, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation):
         initial_seed_and_info = [None, None]
 
         process_width = p.width
@@ -170,9 +169,9 @@ class Script(scripts.Script):
 
         p.mask_blur_x = mask_blur_x*4
         p.mask_blur_y = mask_blur_y*4
-        p.mask_blend_power = mask_blend_power
-        p.mask_blend_scale = mask_blend_scale
-        p.inpaint_detail_preservation = inpaint_detail_preservation
+
+        p.soft_inpainting = si.SoftInpaintingSettings(mask_blend_power, mask_blend_scale, inpaint_detail_preservation) \
+            if mask_blend_enabled else None
 
         init_img = p.init_images[0]
         target_w = math.ceil((init_img.width + left + right) / 64) * 64
diff --git a/scripts/poor_mans_outpainting.py b/scripts/poor_mans_outpainting.py
index 5388f5db..11f7f74a 100644
--- a/scripts/poor_mans_outpainting.py
+++ b/scripts/poor_mans_outpainting.py
@@ -7,6 +7,7 @@ from PIL import Image, ImageDraw
 from modules import images, devices
 from modules.processing import Processed, process_images
 from modules.shared import opts, state
+import modules.soft_inpainting as si
 
 
 class Script(scripts.Script):
@@ -22,23 +23,19 @@ class Script(scripts.Script):
 
         pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels"))
         mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id=self.elem_id("mask_blur"))
-        mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power"))
-        mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id=self.elem_id("mask_blend_scale"))
-        inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id=self.elem_id("inpaint_detail_preservation"))
+        soft_inpainting = si.gradio_ui()[0]
         inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", elem_id=self.elem_id("inpainting_fill"))
         direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction"))
 
-        return [pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction]
+        return [pixels, mask_blur, *soft_inpainting, inpainting_fill, direction]
 
-    def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction):
+    def run(self, p, pixels, mask_blur, mask_blend_enabled, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction):
         initial_seed = None
         initial_info = None
 
         p.mask_blur = mask_blur * 2
-        p.mask_blend_power = mask_blend_power
-        p.mask_blend_scale = mask_blend_scale
-        p.inpaint_detail_preservation = inpaint_detail_preservation
-
+        p.soft_inpainting = si.SoftInpaintingSettings(mask_blend_power, mask_blend_scale, inpaint_detail_preservation) \
+            if mask_blend_enabled else None
         p.inpainting_fill = inpainting_fill
         p.inpaint_full_res = False
 
diff --git a/test/test_img2img.py b/test/test_img2img.py
index 5cda2dba..87bd8509 100644
--- a/test/test_img2img.py
+++ b/test/test_img2img.py
@@ -1,6 +1,7 @@
 
 import pytest
 import requests
+import modules.soft_inpainting as si
 
 
 @pytest.fixture()
@@ -24,9 +25,10 @@ def simple_img2img_request(img2img_basic_image_base64):
         "inpainting_mask_invert": False,
         "mask": None,
         "mask_blur": 4,
-        "mask_blend_power": 1,
-        "mask_blend_scale": 0.5,
-        "inpaint_detail_preservation": 4,
+        "mask_blend_enabled": True,
+        "mask_blend_power": si.default.mask_blend_power,
+        "mask_blend_scale": si.default.mask_blend_scale,
+        "inpaint_detail_preservation": si.default.inpaint_detail_preservation,
         "n_iter": 1,
         "negative_prompt": "",
         "override_settings": {},
-- 
cgit v1.2.3


From 976c1053efeb5054692ed3cfa294cf79196f3946 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 16:06:58 -0700
Subject: Cleaned up code, moved main code contributions into
 soft_inpainting.py

---
 modules/processing.py               |  56 ++----------
 modules/sd_samplers_cfg_denoiser.py |  84 ++---------------
 modules/soft_inpainting.py          | 177 ++++++++++++++++++++++++++++++++----
 modules/ui.py                       |   7 --
 4 files changed, 174 insertions(+), 150 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/processing.py b/modules/processing.py
index b40b1a40..0b360387 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -892,55 +892,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
                 # Generate the mask(s) based on similarity between the original and denoised latent vectors
                 if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    # latent_mask = p.nmask[0].float().cpu()
-
-                    # convert the original mask into a form we use to scale distances for thresholding
-                    # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2))
-                    # mask_scalar = mask_scalar / (1.00001-mask_scalar)
-                    # mask_scalar = mask_scalar.numpy()
-
-                    latent_orig = p.init_latent
-                    latent_proc = samples_ddim
-                    latent_distance = torch.norm(latent_proc - latent_orig, p=2, dim=1)
-
-                    kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2)
-
-                    for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, p.overlay_images)):
-                        converted_mask = distance_map.float().cpu().numpy()
-                        converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center,
-                                                       percentile_min=0.9, percentile_max=1, min_width=1)
-                        converted_mask = images.weighted_histogram_filter(converted_mask,  kernel, kernel_center,
-                                                       percentile_min=0.25, percentile_max=0.75, min_width=1)
-
-                        # The distance at which opacity of original decreases to 50%
-                        # half_weighted_distance = 1  # * mask_scalar
-                        # converted_mask = converted_mask / half_weighted_distance
-
-                        converted_mask = 1 / (1 + converted_mask ** 2)
-                        converted_mask = images.smootherstep(converted_mask)
-                        converted_mask = 1 - converted_mask
-                        converted_mask = 255. * converted_mask
-                        converted_mask = converted_mask.astype(np.uint8)
-                        converted_mask = Image.fromarray(converted_mask)
-                        converted_mask = images.resize_image(2, converted_mask, p.width, p.height)
-                        converted_mask = create_binary_mask(converted_mask, round=False)
-
-                        # Remove aliasing artifacts using a gaussian blur.
-                        converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
-
-                        # Expand the mask to fit the whole image if needed.
-                        if p.paste_to is not None:
-                            converted_mask = uncrop(converted_mask,
-                                                    (overlay_image.width, overlay_image.height),
-                                                    p.paste_to)
-
-                        p.masks_for_overlay[i] = converted_mask
-
-                        image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
-                        image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
-                                           mask=ImageOps.invert(converted_mask.convert('L')))
-
-                        p.overlay_images[i] = image_masked.convert('RGBA')
+                    si.generate_adaptive_masks(latent_orig=p.init_latent,
+                                               latent_processed=samples_ddim,
+                                               overlay_images=p.overlay_images,
+                                               masks_for_overlay=p.masks_for_overlay,
+                                               width=p.width,
+                                               height=p.height,
+                                               paste_to=p.paste_to)
 
                 x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim,
                                                      target_device=devices.cpu,
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 0ee0b7dd..a700e692 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -94,76 +94,6 @@ class CFGDenoiser(torch.nn.Module):
         self.sampler.sampler_extra_args['uncond'] = uc
 
     def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
-        def latent_blend(a, b, t, one_minus_t=None):
-
-            """
-            Interpolates two latent image representations according to the parameter t,
-            where the interpolated vectors' magnitudes are also interpolated separately.
-            The "detail_preservation" factor biases the magnitude interpolation towards
-            the larger of the two magnitudes.
-            """
-            # NOTE: We use inplace operations wherever possible.
-
-            if one_minus_t is None:
-                one_minus_t = 1 - t
-
-            if self.soft_inpainting is None:
-                return a * one_minus_t + b * t
-
-            # Linearly interpolate the image vectors.
-            a_scaled = a * one_minus_t
-            b_scaled = b * t
-            image_interp = a_scaled
-            image_interp.add_(b_scaled)
-            result_type = image_interp.dtype
-            del a_scaled, b_scaled
-
-            # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
-            # 64-bit operations are used here to allow large exponents.
-            current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001)
-
-            # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
-            a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * one_minus_t
-            b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * t
-            desired_magnitude = a_magnitude
-            desired_magnitude.add_(b_magnitude).pow_(1 / self.soft_inpainting.inpaint_detail_preservation)
-            del a_magnitude, b_magnitude, one_minus_t
-
-            # Change the linearly interpolated image vectors' magnitudes to the value we want.
-            # This is the last 64-bit operation.
-            image_interp_scaling_factor = desired_magnitude
-            image_interp_scaling_factor.div_(current_magnitude)
-            image_interp_scaled = image_interp
-            image_interp_scaled.mul_(image_interp_scaling_factor)
-            del current_magnitude
-            del desired_magnitude
-            del image_interp
-            del image_interp_scaling_factor
-
-            image_interp_scaled = image_interp_scaled.to(result_type)
-            del result_type
-
-            return image_interp_scaled
-
-        def get_modified_nmask(nmask, _sigma):
-            """
-            Converts a negative mask representing the transparency of the original latent vectors being overlayed
-            to a mask that is scaled according to the denoising strength for this step.
-
-            Where:
-                0 = fully opaque, infinite density, fully masked
-                1 = fully transparent, zero density, fully unmasked
-
-            We bring this transparency to a power, as this allows one to simulate N number of blending operations
-            where N can be any positive real value. Using this one can control the balance of influence between
-            the denoiser and the original latents according to the sigma value.
-
-            NOTE: "mask" is not used
-            """
-            if self.soft_inpainting is None:
-                return nmask
-
-            return torch.pow(nmask, (_sigma ** self.soft_inpainting.mask_blend_power) * self.soft_inpainting.mask_blend_scale)
 
         if state.interrupted or state.skipped:
             raise sd_samplers_common.InterruptedException
@@ -184,9 +114,12 @@ class CFGDenoiser(torch.nn.Module):
         # Blend in the original latents (before)
         if self.mask_before_denoising and self.mask is not None:
             if self.soft_inpainting is None:
-                x = latent_blend(self.init_latent, x, self.nmask, self.mask)
+                x = self.init_latent * self.mask + self.nmask * x
             else:
-                x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma))
+                x = si.latent_blend(self.soft_inpainting,
+                                    self.init_latent,
+                                    x,
+                                    si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma))
 
         batch_size = len(conds_list)
         repeats = [len(conds_list[i]) for i in range(batch_size)]
@@ -290,9 +223,12 @@ class CFGDenoiser(torch.nn.Module):
         # Blend in the original latents (after)
         if not self.mask_before_denoising and self.mask is not None:
             if self.soft_inpainting is None:
-                denoised = latent_blend(self.init_latent, denoised, self.nmask, self.mask)
+                denoised = self.init_latent * self.mask + self.nmask * denoised
             else:
-                denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma))
+                denoised = si.latent_blend(self.soft_inpainting,
+                                           self.init_latent,
+                                           denoised,
+                                           si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma))
 
         self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
 
diff --git a/modules/soft_inpainting.py b/modules/soft_inpainting.py
index 259c36ec..b81c8dd9 100644
--- a/modules/soft_inpainting.py
+++ b/modules/soft_inpainting.py
@@ -4,13 +4,6 @@ class SoftInpaintingSettings:
         self.mask_blend_scale = mask_blend_scale
         self.inpaint_detail_preservation = inpaint_detail_preservation
 
-    def get_paste_fields(self):
-        return [
-            (self.mask_blend_power, gen_param_labels.mask_blend_power),
-            (self.mask_blend_scale, gen_param_labels.mask_blend_scale),
-            (self.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation),
-        ]
-
     def add_generation_params(self, dest):
         dest[enabled_gen_param_label] = True
         dest[gen_param_labels.mask_blend_power] = self.mask_blend_power
@@ -18,25 +11,169 @@ class SoftInpaintingSettings:
         dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation
 
 
+# ------------------- Methods -------------------
+
+
+def latent_blend(soft_inpainting, a, b, t):
+    """
+    Interpolates two latent image representations according to the parameter t,
+    where the interpolated vectors' magnitudes are also interpolated separately.
+    The "detail_preservation" factor biases the magnitude interpolation towards
+    the larger of the two magnitudes.
+    """
+    import torch
+
+    # NOTE: We use inplace operations wherever possible.
+
+    one_minus_t = 1 - t
+
+    # Linearly interpolate the image vectors.
+    a_scaled = a * one_minus_t
+    b_scaled = b * t
+    image_interp = a_scaled
+    image_interp.add_(b_scaled)
+    result_type = image_interp.dtype
+    del a_scaled, b_scaled
+
+    # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
+    # 64-bit operations are used here to allow large exponents.
+    current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001)
+
+    # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
+    a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * one_minus_t
+    b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * t
+    desired_magnitude = a_magnitude
+    desired_magnitude.add_(b_magnitude).pow_(1 / soft_inpainting.inpaint_detail_preservation)
+    del a_magnitude, b_magnitude, one_minus_t
+
+    # Change the linearly interpolated image vectors' magnitudes to the value we want.
+    # This is the last 64-bit operation.
+    image_interp_scaling_factor = desired_magnitude
+    image_interp_scaling_factor.div_(current_magnitude)
+    image_interp_scaling_factor = image_interp_scaling_factor.to(result_type)
+    image_interp_scaled = image_interp
+    image_interp_scaled.mul_(image_interp_scaling_factor)
+    del current_magnitude
+    del desired_magnitude
+    del image_interp
+    del image_interp_scaling_factor
+    del result_type
+
+    return image_interp_scaled
+
+
+def get_modified_nmask(soft_inpainting, nmask, sigma):
+    """
+    Converts a negative mask representing the transparency of the original latent vectors being overlayed
+    to a mask that is scaled according to the denoising strength for this step.
+
+    Where:
+        0 = fully opaque, infinite density, fully masked
+        1 = fully transparent, zero density, fully unmasked
+
+    We bring this transparency to a power, as this allows one to simulate N number of blending operations
+    where N can be any positive real value. Using this one can control the balance of influence between
+    the denoiser and the original latents according to the sigma value.
+
+    NOTE: "mask" is not used
+    """
+    import torch
+    return torch.pow(nmask, (sigma ** soft_inpainting.mask_blend_power) * soft_inpainting.mask_blend_scale)
+
+
+def generate_adaptive_masks(
+        latent_orig,
+        latent_processed,
+        overlay_images,
+        masks_for_overlay,
+        width, height,
+        paste_to):
+    import torch
+    import numpy as np
+    import modules.processing as proc
+    import modules.images as images
+    from PIL import Image, ImageOps, ImageFilter
+
+    # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control.
+    # latent_mask = p.nmask[0].float().cpu()
+    # convert the original mask into a form we use to scale distances for thresholding
+    # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2))
+    # mask_scalar = mask_scalar / (1.00001-mask_scalar)
+    # mask_scalar = mask_scalar.numpy()
+
+    latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1)
+
+    kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2)
+
+    for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)):
+        converted_mask = distance_map.float().cpu().numpy()
+        converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center,
+                                                          percentile_min=0.9, percentile_max=1, min_width=1)
+        converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center,
+                                                          percentile_min=0.25, percentile_max=0.75, min_width=1)
+
+        # The distance at which opacity of original decreases to 50%
+        # half_weighted_distance = 1  # * mask_scalar
+        # converted_mask = converted_mask / half_weighted_distance
+
+        converted_mask = 1 / (1 + converted_mask ** 2)
+        converted_mask = images.smootherstep(converted_mask)
+        converted_mask = 1 - converted_mask
+        converted_mask = 255. * converted_mask
+        converted_mask = converted_mask.astype(np.uint8)
+        converted_mask = Image.fromarray(converted_mask)
+        converted_mask = images.resize_image(2, converted_mask, width, height)
+        converted_mask = proc.create_binary_mask(converted_mask, round=False)
+
+        # Remove aliasing artifacts using a gaussian blur.
+        converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
+
+        # Expand the mask to fit the whole image if needed.
+        if paste_to is not None:
+            converted_mask = proc. uncrop(converted_mask,
+                                    (overlay_image.width, overlay_image.height),
+                                    paste_to)
+
+        masks_for_overlay[i] = converted_mask
+
+        image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
+        image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
+                           mask=ImageOps.invert(converted_mask.convert('L')))
+
+        overlay_images[i] = image_masked.convert('RGBA')
+
+
+# ------------------- Constants -------------------
+
+
+default = SoftInpaintingSettings(1, 0.5, 4)
+
 enabled_ui_label = "Soft inpainting"
 enabled_gen_param_label = "Soft inpainting enabled"
 enabled_el_id = "soft_inpainting_enabled"
 
-default = SoftInpaintingSettings(1, 0.5, 4)
-ui_labels = SoftInpaintingSettings("Schedule bias", "Preservation strength", "Transition contrast boost")
+ui_labels = SoftInpaintingSettings(
+    "Schedule bias",
+    "Preservation strength",
+    "Transition contrast boost")
 
 ui_info = SoftInpaintingSettings(
-    mask_blend_power="Shifts when preservation of original content occurs during denoising.",
-                     # "Below 1: Stronger preservation near the end (with low sigma)\n"
-                     # "1: Balanced (proportional to sigma)\n"
-                     # "Above 1: Stronger preservation in the beginning (with high sigma)",
-    mask_blend_scale="How strongly partially masked content should be preserved.",
-                     # "Low values: Favors generated content.\n"
-                     # "High values: Favors original content.",
-    inpaint_detail_preservation="Amplifies the contrast that may be lost in partially masked regions.")
-
-gen_param_labels = SoftInpaintingSettings("Soft inpainting schedule bias", "Soft inpainting preservation strength", "Soft inpainting transition contrast boost")
-el_ids = SoftInpaintingSettings("mask_blend_power", "mask_blend_scale", "inpaint_detail_preservation")
+    "Shifts when preservation of original content occurs during denoising.",
+    "How strongly partially masked content should be preserved.",
+    "Amplifies the contrast that may be lost in partially masked regions.")
+
+gen_param_labels = SoftInpaintingSettings(
+    "Soft inpainting schedule bias",
+    "Soft inpainting preservation strength",
+    "Soft inpainting transition contrast boost")
+
+el_ids = SoftInpaintingSettings(
+    "mask_blend_power",
+    "mask_blend_scale",
+    "inpaint_detail_preservation")
+
+
+# ------------------- UI -------------------
 
 
 def gradio_ui():
diff --git a/modules/ui.py b/modules/ui.py
index 0e4fb17a..4f1265a3 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -683,13 +683,6 @@ def create_ui():
                             with FormRow():
                                 soft_inpainting = si.gradio_ui()
 
-
-                            """
-                                mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power")
-                                mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id="img2img_mask_blend_scale")
-                                inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id="img2img_mask_blend_offset")
-                            """
-
                             with FormRow():
                                 inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
 
-- 
cgit v1.2.3


From e90d4334ad37024a802f4ef27069b625a6508f72 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Wed, 6 Dec 2023 16:54:42 -0700
Subject: A custom blending function can be provided by p, replacing the use of
 soft_inpainting.

---
 modules/sd_samplers_cfg_denoiser.py | 34 +++++++++++++++++-----------------
 modules/sd_samplers_common.py       |  1 -
 2 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index a700e692..f13e8dcc 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -6,7 +6,6 @@ import modules.shared as shared
 from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback
 from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback
 from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback
-import modules.soft_inpainting as si
 
 
 def catenate_conds(conds):
@@ -44,7 +43,6 @@ class CFGDenoiser(torch.nn.Module):
         self.model_wrap = None
         self.mask = None
         self.nmask = None
-        self.soft_inpainting: si.SoftInpaintingParameters = None
         self.init_latent = None
         self.steps = None
         """number of steps as specified by user in UI"""
@@ -94,7 +92,6 @@ class CFGDenoiser(torch.nn.Module):
         self.sampler.sampler_extra_args['uncond'] = uc
 
     def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
-
         if state.interrupted or state.skipped:
             raise sd_samplers_common.InterruptedException
 
@@ -111,15 +108,24 @@ class CFGDenoiser(torch.nn.Module):
 
         assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
 
+        # If we use masks, blending between the denoised and original latent images occurs here.
+        def apply_blend(latent):
+            if hasattr(self.p, "denoiser_masked_blend_function") and callable(self.p.denoiser_masked_blend_function):
+                return self.p.denoiser_masked_blend_function(
+                    self,
+                    # Using an argument dictionary so that arguments can be added without breaking extensions.
+                    args=
+                    {
+                        "denoiser": self,
+                        "current_latent": latent,
+                        "sigma": sigma
+                    })
+            else:
+                return self.init_latent * self.mask + self.nmask * latent
+
         # Blend in the original latents (before)
         if self.mask_before_denoising and self.mask is not None:
-            if self.soft_inpainting is None:
-                x = self.init_latent * self.mask + self.nmask * x
-            else:
-                x = si.latent_blend(self.soft_inpainting,
-                                    self.init_latent,
-                                    x,
-                                    si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma))
+            x = apply_blend(x)
 
         batch_size = len(conds_list)
         repeats = [len(conds_list[i]) for i in range(batch_size)]
@@ -222,13 +228,7 @@ class CFGDenoiser(torch.nn.Module):
 
         # Blend in the original latents (after)
         if not self.mask_before_denoising and self.mask is not None:
-            if self.soft_inpainting is None:
-                denoised = self.init_latent * self.mask + self.nmask * denoised
-            else:
-                denoised = si.latent_blend(self.soft_inpainting,
-                                           self.init_latent,
-                                           denoised,
-                                           si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma))
+            denoised = apply_blend(denoised)
 
         self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
 
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 9682bee3..58efcad2 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -277,7 +277,6 @@ class Sampler:
         self.model_wrap_cfg.p = p
         self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
         self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
-        self.model_wrap_cfg.soft_inpainting = p.soft_inpainting if hasattr(p, 'soft_inpainting') else None
         self.model_wrap_cfg.step = 0
         self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
         self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0)
-- 
cgit v1.2.3


From ac4578912395627731f2cd8529f87a95df1f7644 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Wed, 6 Dec 2023 21:16:27 -0700
Subject: Removed soft inpainting, added hooks for softpainting to work
 instead.

---
 modules/processing.py               | 94 +++++++++++++++----------------------
 modules/scripts.py                  | 70 +++++++++++++++++++++++++++
 modules/sd_samplers_cfg_denoiser.py | 23 ++++-----
 3 files changed, 118 insertions(+), 69 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/processing.py b/modules/processing.py
index 7d46949f..5a1a90af 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -30,7 +30,6 @@ import modules.sd_models as sd_models
 import modules.sd_vae as sd_vae
 from ldm.data.util import AddMiDaS
 from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
-import modules.soft_inpainting as si
 
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
@@ -73,12 +72,10 @@ def uncrop(image, dest_size, paste_loc):
     return image
 
 
-def apply_overlay(image, paste_loc, index, overlays):
-    if overlays is None or index >= len(overlays):
+def apply_overlay(image, paste_loc, overlay):
+    if overlay is None:
         return image
 
-    overlay = overlays[index]
-
     if paste_loc is not None:
         image = uncrop(image, (overlay.width, overlay.height), paste_loc)
 
@@ -150,7 +147,6 @@ class StableDiffusionProcessing:
     do_not_save_grid: bool = False
     extra_generation_params: dict[str, Any] = None
     overlay_images: list = None
-    masks_for_overlay: list = None
     eta: float = None
     do_not_reload_embeddings: bool = False
     denoising_strength: float = None
@@ -880,31 +876,17 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
+            if p.scripts is not None:
+                ps = scripts.PostSampleArgs(samples_ddim)
+                p.scripts.post_sample(p, ps)
+                samples_ddim = pp.samples
+
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
-                # todo: generate adaptive masks based on pixel differences.
-                if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    si.apply_masks(soft_inpainting=p.soft_inpainting,
-                                   nmask=p.nmask,
-                                   overlay_images=p.overlay_images,
-                                   masks_for_overlay=p.masks_for_overlay,
-                                   width=p.width,
-                                   height=p.height,
-                                   paste_to=p.paste_to)
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
 
-                # Generate the mask(s) based on similarity between the original and denoised latent vectors
-                if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    si.apply_adaptive_masks(latent_orig=p.init_latent,
-                                            latent_processed=samples_ddim,
-                                            overlay_images=p.overlay_images,
-                                            masks_for_overlay=p.masks_for_overlay,
-                                            width=p.width,
-                                            height=p.height,
-                                            paste_to=p.paste_to)
-
                 x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -955,9 +937,18 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     pp = scripts.PostprocessImageArgs(image)
                     p.scripts.postprocess_image(p, pp)
                     image = pp.image
+
+                mask_for_overlay = p.mask_for_overlay
+                overlay_image = p.overlay_images[i] if p.overlay_images is not None and i < len(p.overlay_images) else None
+
+                if p.scripts is not None:
+                    ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image)
+                    p.scripts.postprocess_maskoverlay(p, ppmo)
+                    mask_for_overlay, overlay_image = pp.mask_for_overlay, pp.overlay_image
+
                 if p.color_corrections is not None and i < len(p.color_corrections):
                     if save_samples and opts.save_images_before_color_correction:
-                        image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images)
+                        image_without_cc = apply_overlay(image, p.paste_to, overlay_image)
                         images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction")
                     image = apply_color_correction(p.color_corrections[i], image)
 
@@ -968,9 +959,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 original_denoised_image = image.copy()
 
                 if p.paste_to is not None:
-                    original_denoised_image = uncrop(original_denoised_image, (p.overlay_images[i].width, p.overlay_images[i].height), p.paste_to)
+                    original_denoised_image = uncrop(original_denoised_image, (p.overlay_image.width, p.overlay_image.height), p.paste_to)
 
-                image = apply_overlay(image, p.paste_to, i, p.overlay_images)
+                image = apply_overlay(image, p.paste_to, overlay_image)
 
                 if save_samples:
                     images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p)
@@ -981,13 +972,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     image.info["parameters"] = text
                 output_images.append(image)
 
-                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
-                    mask_for_overlay = p.mask_for_overlay
-                elif hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and p.masks_for_overlay[i]:
-                    mask_for_overlay = p.masks_for_overlay[i]
-                else:
-                    mask_for_overlay = None
-
                 if mask_for_overlay is not None:
                     if opts.return_mask or opts.save_mask:
                         image_mask = mask_for_overlay.convert('RGB')
@@ -1401,7 +1385,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_x: int = 4
     mask_blur_y: int = 4
     mask_blur: int = None
-    soft_inpainting: si.SoftInpaintingParameters = si.default
+    mask_round: bool = True
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
@@ -1447,7 +1431,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         if image_mask is not None:
             # image_mask is passed in as RGBA by Gradio to support alpha masks,
             # but we still want to support binary masks.
-            image_mask = create_binary_mask(image_mask, round=(self.soft_inpainting is None))
+            image_mask = create_binary_mask(image_mask, round=self.mask_round)
 
             if self.inpainting_mask_invert:
                 image_mask = ImageOps.invert(image_mask)
@@ -1465,7 +1449,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = Image.fromarray(np_mask)
 
             if self.inpaint_full_res:
-                self.mask_for_overlay = image_mask if self.soft_inpainting is None else None
+                self.mask_for_overlay = image_mask
                 mask = image_mask.convert('L')
                 crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
                 crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
@@ -1476,13 +1460,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 self.paste_to = (x1, y1, x2-x1, y2-y1)
             else:
                 image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
+                np_mask = np.array(image_mask)
+                np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
+                self.mask_for_overlay = Image.fromarray(np_mask)
 
-                if self.soft_inpainting is None:
-                    np_mask = np.array(image_mask)
-                    np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
-                    self.mask_for_overlay = Image.fromarray(np_mask)
-
-            self.masks_for_overlay = [] if self.soft_inpainting is not None else None
             self.overlay_images = []
 
         latent_mask = self.latent_mask if self.latent_mask is not None else image_mask
@@ -1504,15 +1485,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image = images.resize_image(self.resize_mode, image, self.width, self.height)
 
             if image_mask is not None:
-                if self.soft_inpainting is not None:
-                    # We apply the masks AFTER to adjust mask based on changed content.
-                    self.overlay_images.append(image.convert('RGBA'))
-                    self.masks_for_overlay.append(image_mask)
-                else:
-                    image_masked = Image.new('RGBa', (image.width, image.height))
-                    image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+                image_masked = Image.new('RGBa', (image.width, image.height))
+                image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
 
-                    self.overlay_images.append(image_masked.convert('RGBA'))
+                self.overlay_images.append(image_masked.convert('RGBA'))
 
             # crop_region is not None if we are doing inpaint full res
             if crop_region is not None:
@@ -1565,7 +1541,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
             latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
             latmask = latmask[0]
-            if self.soft_inpainting is None:
+            if self.mask_round:
                 latmask = np.around(latmask)
             latmask = np.tile(latmask[None], (4, 1, 1))
 
@@ -1578,7 +1554,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
-        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.soft_inpainting is None)
+        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.mask_round)
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         x = self.rng.next()
@@ -1589,8 +1565,14 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
-        if self.mask is not None and self.soft_inpainting is None:
-            samples = samples * self.nmask + self.init_latent * self.mask
+        blended_samples = samples * self.nmask + self.init_latent * self.mask
+
+        if self.scripts is not None:
+            mba = scripts.MaskBlendArgs(self, samples, self.nmask, self.init_latent, self.mask, blended_samples, sigma=None, is_final_blend=True)
+            self.scripts.on_mask_blend(self, mba)
+            blended_samples = mba.blended_latent
+
+        samples = blended_samples
 
         del x
         devices.torch_gc()
diff --git a/modules/scripts.py b/modules/scripts.py
index 7f9454eb..92a07c56 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -11,11 +11,31 @@ from modules import shared, paths, script_callbacks, extensions, script_loading,
 
 AlwaysVisible = object()
 
+class MaskBlendArgs:
+    def __init__(self, current_latent, nmask, init_latent, mask, blended_samples, denoiser=None, sigma=None):
+        self.current_latent = current_latent
+        self.nmask = nmask
+        self.init_latent = init_latent
+        self.mask = mask
+        self.blended_samples = blended_samples
+
+        self.denoiser = denoiser
+        self.is_final_blend = denoiser is None
+        self.sigma = sigma
+
+class PostSampleArgs:
+    def __init__(self, samples):
+        self.samples = samples
 
 class PostprocessImageArgs:
     def __init__(self, image):
         self.image = image
 
+class PostProcessMaskOverlayArgs:
+    def __init__(self, index, mask_for_overlay, overlay_image):
+        self.index = index
+        self.mask_for_overlay = mask_for_overlay
+        self.overlay_image = overlay_image
 
 class PostprocessBatchListArgs:
     def __init__(self, images):
@@ -206,6 +226,25 @@ class Script:
 
         pass
 
+    def on_mask_blend(self, p, mba: MaskBlendArgs, *args):
+        """
+        Called in inpainting mode when the original content is blended with the inpainted content.
+        This is called at every step in the denoising process and once at the end.
+        If is_final_blend is true, this is called for the final blending stage.
+        Otherwise, denoiser and sigma are defined and may be used to inform the procedure.
+        """
+
+        pass
+
+    def post_sample(self, p, ps: PostSampleArgs, *args):
+        """
+        Called after the samples have been generated,
+        but before they have been decoded by the VAE, if applicable.
+        Check getattr(samples, 'already_decoded', False) to test if the images are decoded.
+        """
+
+        pass
+
     def postprocess_image(self, p, pp: PostprocessImageArgs, *args):
         """
         Called for every image after it has been generated.
@@ -213,6 +252,13 @@ class Script:
 
         pass
 
+    def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs, *args):
+        """
+        Called for every image after it has been generated.
+        """
+
+        pass
+
     def postprocess(self, p, processed, *args):
         """
         This function is called after processing ends for AlwaysVisible scripts.
@@ -767,6 +813,22 @@ class ScriptRunner:
             except Exception:
                 errors.report(f"Error running postprocess_batch_list: {script.filename}", exc_info=True)
 
+    def post_sample(self, p, ps: PostSampleArgs):
+        for script in self.alwayson_scripts:
+            try:
+                script_args = p.script_args[script.args_from:script.args_to]
+                script.post_sample(p, ps, *script_args)
+            except Exception:
+                errors.report(f"Error running post_sample: {script.filename}", exc_info=True)
+
+    def on_mask_blend(self, p, mba: MaskBlendArgs):
+        for script in self.alwayson_scripts:
+            try:
+                script_args = p.script_args[script.args_from:script.args_to]
+                script.on_mask_blend(p, mba, *script_args)
+            except Exception:
+                errors.report(f"Error running post_sample: {script.filename}", exc_info=True)
+
     def postprocess_image(self, p, pp: PostprocessImageArgs):
         for script in self.alwayson_scripts:
             try:
@@ -775,6 +837,14 @@ class ScriptRunner:
             except Exception:
                 errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True)
 
+    def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs):
+        for script in self.alwayson_scripts:
+            try:
+                script_args = p.script_args[script.args_from:script.args_to]
+                script.postprocess_maskoverlay(p, ppmo, *script_args)
+            except Exception:
+                errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True)
+
     def before_component(self, component, **kwargs):
         for callback, script in self.on_before_component_elem_id.get(kwargs.get("elem_id"), []):
             try:
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index f13e8dcc..eb9d5daf 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -109,19 +109,16 @@ class CFGDenoiser(torch.nn.Module):
         assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
 
         # If we use masks, blending between the denoised and original latent images occurs here.
-        def apply_blend(latent):
-            if hasattr(self.p, "denoiser_masked_blend_function") and callable(self.p.denoiser_masked_blend_function):
-                return self.p.denoiser_masked_blend_function(
-                    self,
-                    # Using an argument dictionary so that arguments can be added without breaking extensions.
-                    args=
-                    {
-                        "denoiser": self,
-                        "current_latent": latent,
-                        "sigma": sigma
-                    })
-            else:
-                return self.init_latent * self.mask + self.nmask * latent
+        def apply_blend(current_latent):
+            blended_latent = current_latent * self.nmask + self.init_latent * self.mask
+
+            if self.p.scripts is not None:
+                from modules import scripts
+                mba = scripts.MaskBlendArgs(current_latent, self.nmask, self.init_latent, self.mask, blended_latent, denoiser=self, sigma=sigma)
+                self.p.scripts.on_mask_blend(self.p, mba)
+                blended_latent = mba.blended_latent
+
+            return blended_latent
 
         # Blend in the original latents (before)
         if self.mask_before_denoising and self.mask is not None:
-- 
cgit v1.2.3


From 425507bd10c55f1f804eb5015db74520668f46f9 Mon Sep 17 00:00:00 2001
From: continue-revolution <continuerevolution@gmail.com>
Date: Sun, 7 Jan 2024 10:25:01 -0600
Subject: add p to cfgdenoiserparams

---
 modules/script_callbacks.py         | 5 ++++-
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py
index 9ed7ad21..bb47c18d 100644
--- a/modules/script_callbacks.py
+++ b/modules/script_callbacks.py
@@ -41,7 +41,7 @@ class ExtraNoiseParams:
 
 
 class CFGDenoiserParams:
-    def __init__(self, x, image_cond, sigma, sampling_step, total_sampling_steps, text_cond, text_uncond):
+    def __init__(self, x, image_cond, sigma, sampling_step, total_sampling_steps, text_cond, text_uncond, p):
         self.x = x
         """Latent image representation in the process of being denoised"""
 
@@ -63,6 +63,9 @@ class CFGDenoiserParams:
         self.text_uncond = text_uncond
         """ Encoder hidden states of text conditioning from negative prompt"""
 
+        self.p = p
+        """StableDiffusionProcessing object with processing parameters"""
+
 
 class CFGDenoisedParams:
     def __init__(self, x, sampling_step, total_sampling_steps, inner_model):
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index eb9d5daf..f4ded6bd 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -146,7 +146,7 @@ class CFGDenoiser(torch.nn.Module):
             sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma])
             image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond] + [torch.zeros_like(self.init_latent)])
 
-        denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond)
+        denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self.p)
         cfg_denoiser_callback(denoiser_params)
         x_in = denoiser_params.x
         image_cond_in = denoiser_params.image_cond
-- 
cgit v1.2.3


From f56cebf5ba24313447b2204c3f804379767201c9 Mon Sep 17 00:00:00 2001
From: continue-revolution <continuerevolution@gmail.com>
Date: Sun, 7 Jan 2024 12:35:35 -0600
Subject: add self instead

---
 modules/script_callbacks.py         | 6 +++---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py
index bb47c18d..053dfc96 100644
--- a/modules/script_callbacks.py
+++ b/modules/script_callbacks.py
@@ -41,7 +41,7 @@ class ExtraNoiseParams:
 
 
 class CFGDenoiserParams:
-    def __init__(self, x, image_cond, sigma, sampling_step, total_sampling_steps, text_cond, text_uncond, p):
+    def __init__(self, x, image_cond, sigma, sampling_step, total_sampling_steps, text_cond, text_uncond, denoiser):
         self.x = x
         """Latent image representation in the process of being denoised"""
 
@@ -63,8 +63,8 @@ class CFGDenoiserParams:
         self.text_uncond = text_uncond
         """ Encoder hidden states of text conditioning from negative prompt"""
 
-        self.p = p
-        """StableDiffusionProcessing object with processing parameters"""
+        self.denoiser = denoiser
+        """Current CFGDenoiser object with processing parameters"""
 
 
 class CFGDenoisedParams:
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index f4ded6bd..6d76aa96 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -146,7 +146,7 @@ class CFGDenoiser(torch.nn.Module):
             sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma])
             image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond] + [torch.zeros_like(self.init_latent)])
 
-        denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self.p)
+        denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self)
         cfg_denoiser_callback(denoiser_params)
         x_in = denoiser_params.x
         image_cond_in = denoiser_params.image_cond
-- 
cgit v1.2.3


From 757dda9ade9d47cb2a755dad0475c8c4fbcaa114 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 27 Jan 2024 22:30:12 +0300
Subject: Add Pad conds v0 option

---
 modules/infotext_versions.py        |  5 ++-
 modules/sd_samplers_cfg_denoiser.py | 70 ++++++++++++++++++++++++++++++++-----
 modules/sd_samplers_common.py       |  7 ++++
 modules/sd_samplers_kdiffusion.py   |  6 ++--
 modules/sd_samplers_timesteps.py    |  6 ++--
 modules/shared_options.py           |  3 +-
 6 files changed, 78 insertions(+), 19 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/infotext_versions.py b/modules/infotext_versions.py
index a5afeebf..23b45c3f 100644
--- a/modules/infotext_versions.py
+++ b/modules/infotext_versions.py
@@ -31,9 +31,12 @@ def backcompat(d):
     if ver is None:
         return
 
-    if ver < v160:
+    if ver < v160 and '[' in d.get('Prompt', ''):
         d["Old prompt editing timelines"] = True
 
+    if ver < v160 and d.get('Sampler', '') in ('DDIM', 'PLMS'):
+        d["Pad conds v0"] = True
+
     if ver < v170_tsnr:
         d["Downcast alphas_cumprod"] = True
 
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 6d76aa96..ef237396 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -53,6 +53,7 @@ class CFGDenoiser(torch.nn.Module):
         self.step = 0
         self.image_cfg_scale = None
         self.padded_cond_uncond = False
+        self.padded_cond_uncond_v0 = False
         self.sampler = sampler
         self.model_wrap = None
         self.p = None
@@ -91,6 +92,62 @@ class CFGDenoiser(torch.nn.Module):
         self.sampler.sampler_extra_args['cond'] = c
         self.sampler.sampler_extra_args['uncond'] = uc
 
+    def pad_cond_uncond(self, cond, uncond):
+        empty = shared.sd_model.cond_stage_model_empty_prompt
+        num_repeats = (cond.shape[1] - cond.shape[1]) // empty.shape[1]
+
+        if num_repeats < 0:
+            cond = pad_cond(cond, -num_repeats, empty)
+            self.padded_cond_uncond = True
+        elif num_repeats > 0:
+            uncond = pad_cond(uncond, num_repeats, empty)
+            self.padded_cond_uncond = True
+
+        return cond, uncond
+
+    def pad_cond_uncond_v0(self, cond, uncond):
+        """
+        Pads the 'uncond' tensor to match the shape of the 'cond' tensor.
+
+        If 'uncond' is a dictionary, it is assumed that the 'crossattn' key holds the tensor to be padded.
+        If 'uncond' is a tensor, it is padded directly.
+
+        If the number of columns in 'uncond' is less than the number of columns in 'cond', the last column of 'uncond'
+        is repeated to match the number of columns in 'cond'.
+
+        If the number of columns in 'uncond' is greater than the number of columns in 'cond', 'uncond' is truncated
+        to match the number of columns in 'cond'.
+
+        Args:
+            cond (torch.Tensor or DictWithShape): The condition tensor to match the shape of 'uncond'.
+            uncond (torch.Tensor or DictWithShape): The tensor to be padded, or a dictionary containing the tensor to be padded.
+
+        Returns:
+            tuple: A tuple containing the 'cond' tensor and the padded 'uncond' tensor.
+
+        Note:
+            This is the padding that was always used in DDIM before version 1.6.0
+        """
+
+        is_dict_cond = isinstance(uncond, dict)
+        uncond_vec = uncond['crossattn'] if is_dict_cond else uncond
+
+        if uncond_vec.shape[1] < cond.shape[1]:
+            last_vector = uncond_vec[:, -1:]
+            last_vector_repeated = last_vector.repeat([1, cond.shape[1] - uncond_vec.shape[1], 1])
+            uncond_vec = torch.hstack([uncond_vec, last_vector_repeated])
+            self.padded_cond_uncond_v0 = True
+        elif uncond_vec.shape[1] > cond.shape[1]:
+            uncond_vec = uncond_vec[:, :cond.shape[1]]
+            self.padded_cond_uncond_v0 = True
+
+        if is_dict_cond:
+            uncond['crossattn'] = uncond_vec
+        else:
+            uncond = uncond_vec
+
+        return cond, uncond
+
     def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
         if state.interrupted or state.skipped:
             raise sd_samplers_common.InterruptedException
@@ -162,16 +219,11 @@ class CFGDenoiser(torch.nn.Module):
             sigma_in = sigma_in[:-batch_size]
 
         self.padded_cond_uncond = False
+        self.padded_cond_uncond_v0 = False
         if shared.opts.pad_cond_uncond and tensor.shape[1] != uncond.shape[1]:
-            empty = shared.sd_model.cond_stage_model_empty_prompt
-            num_repeats = (tensor.shape[1] - uncond.shape[1]) // empty.shape[1]
-
-            if num_repeats < 0:
-                tensor = pad_cond(tensor, -num_repeats, empty)
-                self.padded_cond_uncond = True
-            elif num_repeats > 0:
-                uncond = pad_cond(uncond, num_repeats, empty)
-                self.padded_cond_uncond = True
+            tensor, uncond = self.pad_cond_uncond(tensor, uncond)
+        elif shared.opts.pad_cond_uncond_v0 and tensor.shape[1] != uncond.shape[1]:
+            tensor, uncond = self.pad_cond_uncond_v0(tensor, uncond)
 
         if tensor.shape[1] == uncond.shape[1] or skip_uncond:
             if is_edit_model:
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 58efcad2..6bd38e12 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -335,3 +335,10 @@ class Sampler:
 
     def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
         raise NotImplementedError()
+
+    def add_infotext(self, p):
+        if self.model_wrap_cfg.padded_cond_uncond:
+            p.extra_generation_params["Pad conds"] = True
+
+        if self.model_wrap_cfg.padded_cond_uncond_v0:
+            p.extra_generation_params["Pad conds v0"] = True
diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py
index 8a8c87e0..337106c0 100644
--- a/modules/sd_samplers_kdiffusion.py
+++ b/modules/sd_samplers_kdiffusion.py
@@ -187,8 +187,7 @@ class KDiffusionSampler(sd_samplers_common.Sampler):
 
         samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
 
-        if self.model_wrap_cfg.padded_cond_uncond:
-            p.extra_generation_params["Pad conds"] = True
+        self.add_infotext(p)
 
         return samples
 
@@ -234,8 +233,7 @@ class KDiffusionSampler(sd_samplers_common.Sampler):
 
         samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
 
-        if self.model_wrap_cfg.padded_cond_uncond:
-            p.extra_generation_params["Pad conds"] = True
+        self.add_infotext(p)
 
         return samples
 
diff --git a/modules/sd_samplers_timesteps.py b/modules/sd_samplers_timesteps.py
index 777dd8d0..8cc7d384 100644
--- a/modules/sd_samplers_timesteps.py
+++ b/modules/sd_samplers_timesteps.py
@@ -133,8 +133,7 @@ class CompVisSampler(sd_samplers_common.Sampler):
 
         samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
 
-        if self.model_wrap_cfg.padded_cond_uncond:
-            p.extra_generation_params["Pad conds"] = True
+        self.add_infotext(p)
 
         return samples
 
@@ -158,8 +157,7 @@ class CompVisSampler(sd_samplers_common.Sampler):
         }
         samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
 
-        if self.model_wrap_cfg.padded_cond_uncond:
-            p.extra_generation_params["Pad conds"] = True
+        self.add_infotext(p)
 
         return samples
 
diff --git a/modules/shared_options.py b/modules/shared_options.py
index fef1fb83..bdd066c4 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -210,7 +210,8 @@ options_templates.update(options_section(('optimizations', "Optimizations", "sd"
     "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"),
-    "pad_cond_uncond": OptionInfo(False, "Pad prompt/negative prompt to be same length", infotext='Pad conds').info("improves performance when prompt and negative prompt have different lengths; changes seeds"),
+    "pad_cond_uncond": OptionInfo(False, "Pad prompt/negative prompt", infotext='Pad conds').info("improves performance when prompt and negative prompt have different lengths; changes seeds"),
+    "pad_cond_uncond_v0": OptionInfo(False, "Pad prompt/negative prompt (v0)", infotext='Pad conds v0').info("alternative implementation for the above; used prior to 1.6.0 for DDIM sampler; ignored if the above is set; changes seeds"),
     "persistent_cond_cache": OptionInfo(True, "Persistent cond cache").info("do not recalculate conds from prompts if prompts have not changed since previous calculation"),
     "batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"),
     "fp8_storage": OptionInfo("Disable", "FP8 weight", gr.Radio, {"choices": ["Disable", "Enable for SDXL", "Enable"]}).info("Use FP8 to store Linear/Conv layers' weight. Require pytorch>=2.1.0."),
-- 
cgit v1.2.3


From baaf39b6f92f24275a1b264a634514bac571dfae Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 29 Jan 2024 10:20:27 +0300
Subject: fix the typo -- thanks Cyberbeing

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index ef237396..941dff4b 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -94,7 +94,7 @@ class CFGDenoiser(torch.nn.Module):
 
     def pad_cond_uncond(self, cond, uncond):
         empty = shared.sd_model.cond_stage_model_empty_prompt
-        num_repeats = (cond.shape[1] - cond.shape[1]) // empty.shape[1]
+        num_repeats = (cond.shape[1] - uncond.shape[1]) // empty.shape[1]
 
         if num_repeats < 0:
             cond = pad_cond(cond, -num_repeats, empty)
-- 
cgit v1.2.3


From 3345218439ab0e74e2b6ea6e9d6291885a6e8fb5 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 17 Feb 2024 13:21:08 +0300
Subject: Update comment for Pad prompt/negative prompt v0 to add a warning
 about truncation, make it override the v1 implementation

---
 modules/sd_samplers_cfg_denoiser.py | 6 +++---
 modules/shared_options.py           | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'modules/sd_samplers_cfg_denoiser.py')

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 941dff4b..a73d3b03 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -220,10 +220,10 @@ class CFGDenoiser(torch.nn.Module):
 
         self.padded_cond_uncond = False
         self.padded_cond_uncond_v0 = False
-        if shared.opts.pad_cond_uncond and tensor.shape[1] != uncond.shape[1]:
-            tensor, uncond = self.pad_cond_uncond(tensor, uncond)
-        elif shared.opts.pad_cond_uncond_v0 and tensor.shape[1] != uncond.shape[1]:
+        if shared.opts.pad_cond_uncond_v0 and tensor.shape[1] != uncond.shape[1]:
             tensor, uncond = self.pad_cond_uncond_v0(tensor, uncond)
+        elif shared.opts.pad_cond_uncond and tensor.shape[1] != uncond.shape[1]:
+            tensor, uncond = self.pad_cond_uncond(tensor, uncond)
 
         if tensor.shape[1] == uncond.shape[1] or skip_uncond:
             if is_edit_model:
diff --git a/modules/shared_options.py b/modules/shared_options.py
index e1d11c8e..25b47aa1 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -211,7 +211,7 @@ options_templates.update(options_section(('optimizations', "Optimizations", "sd"
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"),
     "pad_cond_uncond": OptionInfo(False, "Pad prompt/negative prompt", infotext='Pad conds').info("improves performance when prompt and negative prompt have different lengths; changes seeds"),
-    "pad_cond_uncond_v0": OptionInfo(False, "Pad prompt/negative prompt (v0)", infotext='Pad conds v0').info("alternative implementation for the above; used prior to 1.6.0 for DDIM sampler; ignored if the above is set; changes seeds"),
+    "pad_cond_uncond_v0": OptionInfo(False, "Pad prompt/negative prompt (v0)", infotext='Pad conds v0').info("alternative implementation for the above; used prior to 1.6.0 for DDIM sampler; overrides the above if set; WARNING: truncates negative prompt if it's too long; changes seeds"),
     "persistent_cond_cache": OptionInfo(True, "Persistent cond cache").info("do not recalculate conds from prompts if prompts have not changed since previous calculation"),
     "batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"),
     "fp8_storage": OptionInfo("Disable", "FP8 weight", gr.Radio, {"choices": ["Disable", "Enable for SDXL", "Enable"]}).info("Use FP8 to store Linear/Conv layers' weight. Require pytorch>=2.1.0."),
-- 
cgit v1.2.3