From 5121846d34d74aee9b55d48d35c1559a710051b0 Mon Sep 17 00:00:00 2001
From: Won-Kyu Park <wkpark@gmail.com>
Date: Wed, 25 Oct 2023 21:37:55 +0900
Subject: call state.jobnext() before postproces*()

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 40598f5c..70ad1ebe 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -886,6 +886,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             devices.torch_gc()
 
+            state.nextjob()
+
             if p.scripts is not None:
                 p.scripts.postprocess_batch(p, x_samples_ddim, batch_number=n)
 
@@ -958,8 +960,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             devices.torch_gc()
 
-            state.nextjob()
-
         if not infotexts:
             infotexts.append(Processed(p, []).infotext(p, 0))
 
-- 
cgit v1.2.3


From 5e80d9ee99c5899e5e2b130408ffb65a0585a62a Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Tue, 7 Nov 2023 11:33:16 +0300
Subject: fix pix2pix producing bad results

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 70ad1ebe..b0e240a4 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -296,7 +296,7 @@ class StableDiffusionProcessing:
         return conditioning
 
     def edit_image_conditioning(self, source_image):
-        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
+        conditioning_image = shared.sd_model.encode_first_stage(source_image).mode()
 
         return conditioning_image
 
-- 
cgit v1.2.3


From 294f8a514f982248cda1cafda30d35566f3a0321 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Sat, 11 Nov 2023 23:28:12 +0900
Subject: add hyperTile

https://github.com/tfernd/HyperTile
---
 modules/processing.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b0e240a4..e2309534 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -799,6 +799,16 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
     infotexts = []
     output_images = []
+    unet_object = p.sd_model.model
+    vae_model = p.sd_model.first_stage_model
+    try:
+        from hyper_tile import split_attention, flush
+    except (ImportError, ModuleNotFoundError): # pip install git+https://github.com/tfernd/HyperTile@2ef64b2800d007d305755c33550537410310d7df 
+        split_attention = lambda *args, **kwargs: lambda x: x # return a no-op context manager
+        flush = lambda: None
+    import random
+    saved_rng_state = random.getstate()
+    random.seed(p.seed) # hyper_tile uses random, so we need to seed it
 
     with torch.no_grad(), p.sd_model.ema_scope():
         with devices.autocast():
@@ -866,15 +876,25 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
-                samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
+                # get largest tile size available, which is 2^x which is factor of gcd of p.width and p.height
+                gcd = math.gcd(p.width, p.height)
+                largest_tile_size_available = 1
+                while gcd % (largest_tile_size_available * 2) == 0:
+                    largest_tile_size_available *= 2
+                aspect_ratio = p.width / p.height
+                with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn):
+                    with split_attention(unet_object, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn):
+                        flush()
+                        samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-
-                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
+                with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn):
+                    flush()
+                    x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
@@ -980,6 +1000,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if opts.grid_save:
                 images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(use_main_prompt=True), short_filename=not opts.grid_extended_filename, p=p, grid=True)
 
+    random.setstate(saved_rng_state)
     if not p.disable_extra_networks and p.extra_network_data:
         extra_networks.deactivate(p, p.extra_network_data)
 
-- 
cgit v1.2.3


From b29fc6d4de8812b25c520a46676cda13c3fe64ca Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Sat, 11 Nov 2023 23:43:13 +0900
Subject: Implement Hypertile

Co-Authored-By: Kieran Hunt <kph@hotmail.ca>
---
 modules/processing.py | 65 ++++++++++++++++++++-------------------------------
 1 file changed, 25 insertions(+), 40 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index e2309534..e19a09a3 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -24,6 +24,7 @@ from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.paths as paths
 import modules.face_restoration
+from modules.hypertile import split_attention, set_hypertile_seed, largest_tile_size_available
 import modules.images as images
 import modules.styles
 import modules.sd_models as sd_models
@@ -799,17 +800,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
     infotexts = []
     output_images = []
-    unet_object = p.sd_model.model
-    vae_model = p.sd_model.first_stage_model
-    try:
-        from hyper_tile import split_attention, flush
-    except (ImportError, ModuleNotFoundError): # pip install git+https://github.com/tfernd/HyperTile@2ef64b2800d007d305755c33550537410310d7df 
-        split_attention = lambda *args, **kwargs: lambda x: x # return a no-op context manager
-        flush = lambda: None
-    import random
-    saved_rng_state = random.getstate()
-    random.seed(p.seed) # hyper_tile uses random, so we need to seed it
-
     with torch.no_grad(), p.sd_model.ema_scope():
         with devices.autocast():
             p.init(p.all_prompts, p.all_seeds, p.all_subseeds)
@@ -871,29 +861,20 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 p.comment(comment)
 
             p.extra_generation_params.update(model_hijack.extra_generation_params)
-
+            set_hypertile_seed(p.seed)
+            # add batch size + hypertile status to information to reproduce the run
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
-                # get largest tile size available, which is 2^x which is factor of gcd of p.width and p.height
-                gcd = math.gcd(p.width, p.height)
-                largest_tile_size_available = 1
-                while gcd % (largest_tile_size_available * 2) == 0:
-                    largest_tile_size_available *= 2
-                aspect_ratio = p.width / p.height
-                with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn):
-                    with split_attention(unet_object, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn):
-                        flush()
-                        samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
+                samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-                with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn):
-                    flush()
+                with split_attention(p.sd_model.first_stage_model, aspect_ratio = p.width / p.height, tile_size=min(largest_tile_size_available(p.width, p.height), 128), disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
                     x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -1000,7 +981,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if opts.grid_save:
                 images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(use_main_prompt=True), short_filename=not opts.grid_extended_filename, p=p, grid=True)
 
-    random.setstate(saved_rng_state)
     if not p.disable_extra_networks and p.extra_network_data:
         extra_networks.deactivate(p, p.extra_network_data)
 
@@ -1161,24 +1141,25 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
-
+        aspect_ratio = self.width / self.height
         x = self.rng.next()
-        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
+        tile_size = largest_tile_size_available(self.width, self.height)
+        with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+            with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
+                devices.torch_gc()
+                samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
         del x
-
         if not self.enable_hr:
             return samples
 
         if self.latent_scale_mode is None:
-            decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
+            with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+                decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
         else:
             decoded_samples = None
 
         with sd_models.SkipWritingToConfig():
             sd_models.reload_model_weights(info=self.hr_checkpoint_info)
-
-        devices.torch_gc()
-
         return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts)
 
     def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts):
@@ -1186,7 +1167,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             return samples
 
         self.is_hr_pass = True
-
         target_width = self.hr_upscale_to_x
         target_height = self.hr_upscale_to_y
 
@@ -1264,18 +1244,19 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         if self.scripts is not None:
             self.scripts.before_hr(self)
-
-        samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
+        tile_size = largest_tile_size_available(target_width, target_height)
+        with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+            with split_attention(self.sd_model.model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=3, max_depth=1,scale_depth=True, disable=not opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
+                samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
 
         self.sampler = None
         devices.torch_gc()
-
-        decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
+        with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+            decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
 
         self.is_hr_pass = False
-
         return decoded_samples
 
     def close(self):
@@ -1550,8 +1531,12 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         if self.initial_noise_multiplier != 1.0:
             self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier
             x *= self.initial_noise_multiplier
-
-        samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
+        aspect_ratio = self.width / self.height
+        tile_size = largest_tile_size_available(self.width, self.height)
+        with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+            with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
+                devices.torch_gc()
+                samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
         if self.mask is not None:
             samples = samples * self.nmask + self.init_latent * self.mask
-- 
cgit v1.2.3


From bcfaf3979a9f93e37c418b58c75b02d9570b4354 Mon Sep 17 00:00:00 2001
From: AngelBottomless <aria1th@naver.com>
Date: Thu, 16 Nov 2023 18:43:16 +0900
Subject: convert/add hypertile options

---
 modules/processing.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index e19a09a3..c622ff33 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -24,7 +24,7 @@ from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.paths as paths
 import modules.face_restoration
-from modules.hypertile import split_attention, set_hypertile_seed, largest_tile_size_available
+from modules.hypertile import set_hypertile_seed, largest_tile_size_available, hypertile_context_unet, hypertile_context_vae
 import modules.images as images
 import modules.styles
 import modules.sd_models as sd_models
@@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-                with split_attention(p.sd_model.first_stage_model, aspect_ratio = p.width / p.height, tile_size=min(largest_tile_size_available(p.width, p.height), 128), disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+                with hypertile_context_unet(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                     x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -1144,8 +1144,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         aspect_ratio = self.width / self.height
         x = self.rng.next()
         tile_size = largest_tile_size_available(self.width, self.height)
-        with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
-            with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
+        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
+            with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                 devices.torch_gc()
                 samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
         del x
@@ -1153,7 +1153,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             return samples
 
         if self.latent_scale_mode is None:
-            with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+            with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
                 decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
         else:
             decoded_samples = None
@@ -1245,15 +1245,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         if self.scripts is not None:
             self.scripts.before_hr(self)
         tile_size = largest_tile_size_available(target_width, target_height)
-        with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
-            with split_attention(self.sd_model.model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=3, max_depth=1,scale_depth=True, disable=not opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
+        aspect_ratio = self.width / self.height
+        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
+            with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                 samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
 
         self.sampler = None
         devices.torch_gc()
-        with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
+        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
             decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
 
         self.is_hr_pass = False
@@ -1533,8 +1534,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             x *= self.initial_noise_multiplier
         aspect_ratio = self.width / self.height
         tile_size = largest_tile_size_available(self.width, self.height)
-        with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
-            with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
+        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
+            with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                 devices.torch_gc()
                 samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
-- 
cgit v1.2.3


From c40be2252ab1c8c289562db208c5ac6618bd8545 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Fri, 17 Nov 2023 09:22:27 +0900
Subject: Fix critical issue - unet apply

---
 modules/processing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index c622ff33..2fda7f33 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-                with hypertile_context_unet(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
+                with hypertile_context_unet(p.sd_model.model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                     x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -1145,7 +1145,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         x = self.rng.next()
         tile_size = largest_tile_size_available(self.width, self.height)
         with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
+            with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                 devices.torch_gc()
                 samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
         del x
@@ -1247,7 +1247,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         tile_size = largest_tile_size_available(target_width, target_height)
         aspect_ratio = self.width / self.height
         with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
+            with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                 samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
@@ -1535,7 +1535,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         aspect_ratio = self.width / self.height
         tile_size = largest_tile_size_available(self.width, self.height)
         with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
+            with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
                 devices.torch_gc()
                 samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
-- 
cgit v1.2.3


From 97431f29feb17ffc96ca95e9b3efec87be9d8b3a Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Fri, 17 Nov 2023 10:05:28 +0900
Subject: fix double gc and decoding with unet context

---
 modules/processing.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 2fda7f33..36c2be5e 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-                with hypertile_context_unet(p.sd_model.model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
+                with hypertile_context_vae(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), opts=shared.opts):
                     x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -1146,11 +1146,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         tile_size = largest_tile_size_available(self.width, self.height)
         with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
             with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
-                devices.torch_gc()
                 samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
         del x
         if not self.enable_hr:
             return samples
+        devices.torch_gc()
 
         if self.latent_scale_mode is None:
             with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
@@ -1536,7 +1536,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         tile_size = largest_tile_size_available(self.width, self.height)
         with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
             with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
-                devices.torch_gc()
                 samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
         if self.mask is not None:
-- 
cgit v1.2.3


From d2e0c1ca132f4f0d98b77397a9f353d4ad8e7c4b Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 26 Nov 2023 10:51:45 +0300
Subject: rework hypertile into a built-in extension

---
 modules/processing.py | 37 +++++++++++++------------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 36c2be5e..ac58ef86 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -24,7 +24,6 @@ from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.paths as paths
 import modules.face_restoration
-from modules.hypertile import set_hypertile_seed, largest_tile_size_available, hypertile_context_unet, hypertile_context_vae
 import modules.images as images
 import modules.styles
 import modules.sd_models as sd_models
@@ -861,8 +860,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 p.comment(comment)
 
             p.extra_generation_params.update(model_hijack.extra_generation_params)
-            set_hypertile_seed(p.seed)
-            # add batch size + hypertile status to information to reproduce the run
+
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
 
@@ -874,8 +872,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-                with hypertile_context_vae(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), opts=shared.opts):
-                    x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
+                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
@@ -1141,25 +1138,23 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
-        aspect_ratio = self.width / self.height
+
         x = self.rng.next()
-        tile_size = largest_tile_size_available(self.width, self.height)
-        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
-                samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
+        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
         del x
+
         if not self.enable_hr:
             return samples
         devices.torch_gc()
 
         if self.latent_scale_mode is None:
-            with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-                decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
+            decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
         else:
             decoded_samples = None
 
         with sd_models.SkipWritingToConfig():
             sd_models.reload_model_weights(info=self.hr_checkpoint_info)
+
         return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts)
 
     def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts):
@@ -1244,18 +1239,15 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         if self.scripts is not None:
             self.scripts.before_hr(self)
-        tile_size = largest_tile_size_available(target_width, target_height)
-        aspect_ratio = self.width / self.height
-        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
-                samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
+
+        samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
 
         self.sampler = None
         devices.torch_gc()
-        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
+
+        decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
 
         self.is_hr_pass = False
         return decoded_samples
@@ -1532,11 +1524,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         if self.initial_noise_multiplier != 1.0:
             self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier
             x *= self.initial_noise_multiplier
-        aspect_ratio = self.width / self.height
-        tile_size = largest_tile_size_available(self.width, self.height)
-        with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
-            with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
-                samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
+
+        samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
         if self.mask is not None:
             samples = samples * self.nmask + self.init_latent * self.mask
-- 
cgit v1.2.3


From dec791d35ddcd02ca33563d3d0355e05e45de8ad Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 15:05:01 -0700
Subject: Removed code which forces the inpainting mask to be 0 or 1. Now
 fractional values (e.g. 0.5) are accepted.

---
 modules/processing.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index e124e7f0..317458f5 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -83,7 +83,7 @@ def apply_overlay(image, paste_loc, index, overlays):
 
 def create_binary_mask(image):
     if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255):
-        image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
+        image = image.split()[-1].convert("L")
     else:
         image = image.convert('L')
     return image
@@ -319,9 +319,6 @@ class StableDiffusionProcessing:
                 conditioning_mask = np.array(image_mask.convert("L"))
                 conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
                 conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
-
-                # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
-                conditioning_mask = torch.round(conditioning_mask)
         else:
             conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:])
 
@@ -1504,7 +1501,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
             latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
             latmask = latmask[0]
-            latmask = np.around(latmask)
             latmask = np.tile(latmask[None], (4, 1, 1))
 
             self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype)
-- 
cgit v1.2.3


From bbba133f054706c3668b7d03b0e6d0afc15705db Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 15:09:43 -0700
Subject: Removed conflicting step that replaces the softly inpainted latents
 with a naive blend with the original latents.

---
 modules/processing.py | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 317458f5..ae894f1a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1523,9 +1523,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
-        if self.mask is not None:
-            samples = samples * self.nmask + self.init_latent * self.mask
-
         del x
         devices.torch_gc()
 
-- 
cgit v1.2.3


From a6e584645305c0a91a3d46f73546e191b249210f Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 16:13:42 -0700
Subject: Nerfs the aggressive post-processing step of overlaying the original
 image.

---
 modules/processing.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index ae894f1a..12e08e87 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1412,7 +1412,12 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = Image.fromarray(np_mask)
 
             if self.inpaint_full_res:
-                self.mask_for_overlay = image_mask
+                np_mask = np.array(image_mask).astype(np.float32)
+                np_mask /= 255
+                np_mask = 1-pow(1-np_mask, 100)
+                np_mask *= 255
+                np_mask = np.clip(np_mask, 0, 255).astype(np.uint8)
+                self.mask_for_overlay = Image.fromarray(np_mask)
                 mask = image_mask.convert('L')
                 crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
                 crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
@@ -1423,8 +1428,11 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 self.paste_to = (x1, y1, x2-x1, y2-y1)
             else:
                 image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
-                np_mask = np.array(image_mask)
-                np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
+                np_mask = np.array(image_mask).astype(np.float32)
+                np_mask /= 255
+                np_mask = 1-pow(1-np_mask, 100)
+                np_mask *= 255
+                np_mask = np.clip(np_mask, 0, 255).astype(np.uint8)
                 self.mask_for_overlay = Image.fromarray(np_mask)
 
             self.overlay_images = []
-- 
cgit v1.2.3


From debf836fcc8d9becc3da8b1a29e33f40b0d9ef3e Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 16:15:36 -0700
Subject: Added UI elements to control blending parameters.

---
 modules/processing.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 12e08e87..da4d6fda 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1349,6 +1349,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_x: int = 4
     mask_blur_y: int = 4
     mask_blur: int = None
+    mask_blend_power: float = 1
+    mask_blend_scale: float = 1
+    mask_blend_offset: float = 0
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
-- 
cgit v1.2.3


From c5c7fa06aae1ae9f8b6d29ae2da3874921d4729b Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 22:35:07 -0700
Subject: Added slider for detail preservation strength, removed largely
 needless offset parameter, changed labels in UI and for saving to/pasting
 data from PNG files.

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index da4d6fda..361e8b05 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1351,7 +1351,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur: int = None
     mask_blend_power: float = 1
     mask_blend_scale: float = 1
-    mask_blend_offset: float = 0
+    inpaint_detail_preservation: float = 16
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
-- 
cgit v1.2.3


From c7a1ff87207544dd4bcf3aefffa67a4a38678c16 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Tue, 28 Nov 2023 23:31:10 -0700
Subject: Tweaked default values.

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 361e8b05..92fdebad 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1350,8 +1350,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_y: int = 4
     mask_blur: int = None
     mask_blend_power: float = 1
-    mask_blend_scale: float = 1
-    inpaint_detail_preservation: float = 16
+    mask_blend_scale: float = 0.5
+    inpaint_detail_preservation: float = 4
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
-- 
cgit v1.2.3


From 6d0a8dcd892f7ad9b399fed6edbad6ede13c5f69 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Wed, 29 Nov 2023 17:42:07 -0500
Subject: Implement zero terminal SNR schedule option

---
 modules/processing.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index ac58ef86..c88eec70 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -863,6 +863,34 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
+                
+            def rescale_zero_terminal_snr_abar(alphas_cumprod):
+                alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+                # Store old values.
+                alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+                alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+                # Shift so the last timestep is zero.
+                alphas_bar_sqrt -= (alphas_bar_sqrt_T)
+
+                # Scale so the first timestep is back to the old value.
+                alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+                # Convert alphas_bar_sqrt to betas
+                alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+                alphas_bar[-1] = 4.8973451890853435e-08
+                return alphas_bar
+            
+            p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
+
+            if opts.use_downcasted_alpha_bar:
+                p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
+                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
+            if opts.sd_noise_schedule == "Zero Terminal SNR":
+                p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
+                print("rescaling noise schedule for zero snr")
+                p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
-- 
cgit v1.2.3


From ffa7f8201d849636bb327b3b40298e7c169ff204 Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Wed, 29 Nov 2023 18:10:43 -0500
Subject: Lint

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index c88eec70..f3883d5b 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -863,7 +863,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
-                
+
             def rescale_zero_terminal_snr_abar(alphas_cumprod):
                 alphas_bar_sqrt = alphas_cumprod.sqrt()
 
@@ -881,7 +881,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
                 alphas_bar[-1] = 4.8973451890853435e-08
                 return alphas_bar
-            
+
             p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
 
             if opts.use_downcasted_alpha_bar:
-- 
cgit v1.2.3


From de79597ab9894965e3702939b8536ec3dcc3c859 Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Wed, 29 Nov 2023 18:33:32 -0500
Subject: Only apply ztSNR related code if alphas_cumprod exists

---
 modules/processing.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index f3883d5b..7e73d7e2 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -882,15 +882,16 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 alphas_bar[-1] = 4.8973451890853435e-08
                 return alphas_bar
 
-            p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
-
-            if opts.use_downcasted_alpha_bar:
-                p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
-                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
-            if opts.sd_noise_schedule == "Zero Terminal SNR":
-                p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
-                print("rescaling noise schedule for zero snr")
-                p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
+            if hasattr(p.sd_model, 'alphas_cumprod') and hasattr(p.sd_model, 'alphas_cumprod_original'):
+                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
+
+                if opts.use_downcasted_alpha_bar:
+                    p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
+                    p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
+                if opts.sd_noise_schedule == "Zero Terminal SNR":
+                    p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
+                    print("rescaling noise schedule for zero snr")
+                    p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
-- 
cgit v1.2.3


From 668ae34e21df848ef4909b8b49c4142a3674701b Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Wed, 29 Nov 2023 22:48:31 -0500
Subject: remove debug print

---
 modules/processing.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 7e73d7e2..d73c8bfc 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -890,7 +890,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
                 if opts.sd_noise_schedule == "Zero Terminal SNR":
                     p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
-                    print("rescaling noise schedule for zero snr")
                     p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
-- 
cgit v1.2.3


From b58d061e41cba6fb91910d310d53e175d0511650 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 2 Dec 2023 08:33:28 +0300
Subject: infotext updates: add option to disregard certain infotext fields,
 add option to not include VAE in infotext, add explanation to infotext
 settings page, move some options to infotext settings page

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index ac58ef86..5ab6ddde 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -679,8 +679,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Size": f"{p.width}x{p.height}",
         "Model hash": p.sd_model_hash if opts.add_model_hash_to_info else None,
         "Model": p.sd_model_name if opts.add_model_name_to_info else None,
-        "VAE hash": p.sd_vae_hash if opts.add_model_hash_to_info else None,
-        "VAE": p.sd_vae_name if opts.add_model_name_to_info else None,
+        "VAE hash": p.sd_vae_hash if opts.add_vae_hash_to_info else None,
+        "VAE": p.sd_vae_name if opts.add_vae_name_to_info else None,
         "Variation seed": (None if p.subseed_strength == 0 else (p.all_subseeds[0] if use_main_prompt else all_subseeds[index])),
         "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
         "Seed resize from": (None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
-- 
cgit v1.2.3


From 309a606c2fa645b6b8623f96ea56117e685a47fb Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Sat, 2 Dec 2023 13:07:45 -0500
Subject: ensure that original alpha bar always exists

---
 modules/processing.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index d73c8bfc..bfa59038 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -882,15 +882,17 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 alphas_bar[-1] = 4.8973451890853435e-08
                 return alphas_bar
 
-            if hasattr(p.sd_model, 'alphas_cumprod') and hasattr(p.sd_model, 'alphas_cumprod_original'):
-                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
-
-                if opts.use_downcasted_alpha_bar:
-                    p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
-                    p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
-                if opts.sd_noise_schedule == "Zero Terminal SNR":
-                    p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
-                    p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
+            if hasattr(p.sd_model, 'alphas_cumprod') and not hasattr(p.sd_model, 'alphas_cumprod_original'):
+                p.sd_model.alphas_cumprod_original = p.sd_model.alphas_cumprod
+                
+            p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
+
+            if opts.use_downcasted_alpha_bar:
+                p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
+                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
+            if opts.sd_noise_schedule == "Zero Terminal SNR":
+                p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
+                p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
-- 
cgit v1.2.3


From 81c4ddf6ebebe6f18338de3b0391da1d8521a525 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Sat, 2 Dec 2023 13:11:00 -0500
Subject: fix linting

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index bfa59038..eeccea74 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -884,7 +884,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             if hasattr(p.sd_model, 'alphas_cumprod') and not hasattr(p.sd_model, 'alphas_cumprod_original'):
                 p.sd_model.alphas_cumprod_original = p.sd_model.alphas_cumprod
-                
+
             p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
 
             if opts.use_downcasted_alpha_bar:
-- 
cgit v1.2.3


From 83e8c322762c545fd589c060811379582926060f Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Sat, 2 Dec 2023 13:30:53 -0500
Subject: Fix `save_samples` being checked early when saving masked composite

---
 modules/processing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 5ab6ddde..4f265801 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -938,14 +938,14 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 if opts.enable_pnginfo:
                     image.info["parameters"] = text
                 output_images.append(image)
-                if save_samples and hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
+                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
                     image_mask = p.mask_for_overlay.convert('RGB')
                     image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
 
-                    if opts.save_mask:
+                    if save_samples and opts.save_mask:
                         images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
 
-                    if opts.save_mask_composite:
+                    if save_samples and opts.save_mask_composite:
                         images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
 
                     if opts.return_mask:
-- 
cgit v1.2.3


From 4a43334376d9e116f7a1446f042f9af9c0484fc6 Mon Sep 17 00:00:00 2001
From: drhead <runemaste644@gmail.com>
Date: Sat, 2 Dec 2023 14:05:42 -0500
Subject: Revert 309a606c

---
 modules/processing.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index eeccea74..d73c8bfc 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -882,17 +882,15 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 alphas_bar[-1] = 4.8973451890853435e-08
                 return alphas_bar
 
-            if hasattr(p.sd_model, 'alphas_cumprod') and not hasattr(p.sd_model, 'alphas_cumprod_original'):
-                p.sd_model.alphas_cumprod_original = p.sd_model.alphas_cumprod
-
-            p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
-
-            if opts.use_downcasted_alpha_bar:
-                p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
-                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
-            if opts.sd_noise_schedule == "Zero Terminal SNR":
-                p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
-                p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
+            if hasattr(p.sd_model, 'alphas_cumprod') and hasattr(p.sd_model, 'alphas_cumprod_original'):
+                p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod_original.to(shared.device)
+
+                if opts.use_downcasted_alpha_bar:
+                    p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
+                    p.sd_model.alphas_cumprod = p.sd_model.alphas_cumprod.half().to(shared.device)
+                if opts.sd_noise_schedule == "Zero Terminal SNR":
+                    p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
+                    p.sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(p.sd_model.alphas_cumprod).to(shared.device)
 
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
-- 
cgit v1.2.3


From 73ab982d1b7394574d1cf2e0a151bc457eeed769 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Sat, 2 Dec 2023 21:07:02 -0700
Subject: Blend masks are now produced afterward, based on an estimate of the
 visual difference between the original and modified latent images. This
 should remove ghosting and clipping artifacts from masks, while preserving
 the details of largely unchanged content.

---
 modules/processing.py | 119 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 90 insertions(+), 29 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 92fdebad..ad716e11 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
 
 import torch
 import numpy as np
-from PIL import Image, ImageOps
+from PIL import Image, ImageOps, ImageFilter
 import random
 import cv2
 from skimage import exposure
@@ -62,6 +62,16 @@ def apply_color_correction(correction, original_image):
     return image.convert('RGB')
 
 
+def uncrop(image, dest_size, paste_loc):
+    x, y, w, h = paste_loc
+    base_image = Image.new('RGBA', dest_size)
+    image = images.resize_image(1, image, w, h)
+    base_image.paste(image, (x, y))
+    image = base_image
+
+    return image
+
+
 def apply_overlay(image, paste_loc, index, overlays):
     if overlays is None or index >= len(overlays):
         return image
@@ -69,11 +79,7 @@ def apply_overlay(image, paste_loc, index, overlays):
     overlay = overlays[index]
 
     if paste_loc is not None:
-        x, y, w, h = paste_loc
-        base_image = Image.new('RGBA', (overlay.width, overlay.height))
-        image = images.resize_image(1, image, w, h)
-        base_image.paste(image, (x, y))
-        image = base_image
+        image = uncrop(image, (overlay.width, overlay.height), paste_loc)
 
     image = image.convert('RGBA')
     image.alpha_composite(overlay)
@@ -140,6 +146,7 @@ class StableDiffusionProcessing:
     do_not_save_grid: bool = False
     extra_generation_params: dict[str, Any] = None
     overlay_images: list = None
+    masks_for_overlay: list = None
     eta: float = None
     do_not_reload_embeddings: bool = False
     denoising_strength: float = 0
@@ -865,11 +872,66 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
+                # todo: generate masks the old fashioned way
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
 
-                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
+                # Generate the mask(s) based on similarity between the original and denoised latent vectors
+                if getattr(p, "image_mask", None) is not None:
+                    # latent_mask = p.nmask[0].float().cpu()
+
+                    # convert the original mask into a form we use to scale distances for thresholding
+                    # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2))
+                    # mask_scalar = mask_scalar / (1.00001-mask_scalar)
+                    # mask_scalar = mask_scalar.numpy()
+
+                    latent_orig = p.init_latent
+                    latent_proc = samples_ddim
+                    latent_distance = torch.norm(latent_proc - latent_orig, p=2, dim=1)
+
+                    kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2)
+
+                    for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, p.overlay_images)):
+                        converted_mask = distance_map.float().cpu().numpy()
+                        converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center,
+                                                       percentile_min=0.9, percentile_max=1, min_width=1)
+                        converted_mask = images.weighted_histogram_filter(converted_mask,  kernel, kernel_center,
+                                                       percentile_min=0.25, percentile_max=0.75, min_width=1)
+
+                        # The distance at which opacity of original decreases to 50%
+                        # half_weighted_distance = 1  # * mask_scalar
+                        # converted_mask = converted_mask / half_weighted_distance
+
+                        converted_mask = 1 / (1 + converted_mask ** 2)
+                        converted_mask = images.smootherstep(converted_mask)
+                        converted_mask = 1 - converted_mask
+                        converted_mask = 255. * converted_mask
+                        converted_mask = converted_mask.astype(np.uint8)
+                        converted_mask = Image.fromarray(converted_mask)
+                        converted_mask = images.resize_image(2, converted_mask, p.width, p.height)
+                        converted_mask = create_binary_mask(converted_mask)
+
+                        # Remove aliasing artifacts using a gaussian blur.
+                        converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
+
+                        # Expand the mask to fit the whole image if needed.
+                        if p.paste_to is not None:
+                            converted_mask = uncrop(converted_mask,
+                                                    (overlay_image.width, overlay_image.height),
+                                                    p.paste_to)
+
+                        p.masks_for_overlay[i] = converted_mask
+
+                        image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
+                        image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
+                                           mask=ImageOps.invert(converted_mask.convert('L')))
+
+                        p.overlay_images[i] = image_masked.convert('RGBA')
+
+                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim,
+                                                     target_device=devices.cpu,
+                                                     check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
@@ -892,7 +954,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 x_samples_ddim = batch_params.images
 
             def infotext(index=0, use_main_prompt=False):
-                return create_infotext(p, p.prompts, p.seeds, p.subseeds, use_main_prompt=use_main_prompt, index=index, all_negative_prompts=p.negative_prompts)
+                return create_infotext(p, p.prompts, p.seeds, p.subseeds,
+                                       use_main_prompt=use_main_prompt, index=index,
+                                       all_negative_prompts=p.negative_prompts)
 
             save_samples = p.save_samples()
 
@@ -923,19 +987,27 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                         images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction")
                     image = apply_color_correction(p.color_corrections[i], image)
 
+                # If the intention is to show the output from the model
+                # that is being composited over the original image,
+                # we need to keep the original image around
+                # and use it in the composite step.
+                original_denoised_image = image.copy()
                 image = apply_overlay(image, p.paste_to, i, p.overlay_images)
 
                 if save_samples:
-                    images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p)
+                    images.save_image(image, p.outpath_samples, "", p.seeds[i],
+                                      p.prompts[i], opts.samples_format, info=infotext(i), p=p)
 
                 text = infotext(i)
                 infotexts.append(text)
                 if opts.enable_pnginfo:
                     image.info["parameters"] = text
                 output_images.append(image)
-                if save_samples and hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
-                    image_mask = p.mask_for_overlay.convert('RGB')
-                    image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
+                if save_samples and hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
+                    image_mask = p.masks_for_overlay[i].convert('RGB')
+                    image_mask_composite = Image.composite(
+                        original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size),
+                        images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA')
 
                     if opts.save_mask:
                         images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
@@ -1364,7 +1436,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     nmask: torch.Tensor = field(default=None, init=False)
     image_conditioning: torch.Tensor = field(default=None, init=False)
     init_img_hash: str = field(default=None, init=False)
-    mask_for_overlay: Image = field(default=None, init=False)
     init_latent: torch.Tensor = field(default=None, init=False)
 
     def __post_init__(self):
@@ -1415,12 +1486,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = Image.fromarray(np_mask)
 
             if self.inpaint_full_res:
-                np_mask = np.array(image_mask).astype(np.float32)
-                np_mask /= 255
-                np_mask = 1-pow(1-np_mask, 100)
-                np_mask *= 255
-                np_mask = np.clip(np_mask, 0, 255).astype(np.uint8)
-                self.mask_for_overlay = Image.fromarray(np_mask)
                 mask = image_mask.convert('L')
                 crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
                 crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
@@ -1431,13 +1496,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 self.paste_to = (x1, y1, x2-x1, y2-y1)
             else:
                 image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
-                np_mask = np.array(image_mask).astype(np.float32)
-                np_mask /= 255
-                np_mask = 1-pow(1-np_mask, 100)
-                np_mask *= 255
-                np_mask = np.clip(np_mask, 0, 255).astype(np.uint8)
-                self.mask_for_overlay = Image.fromarray(np_mask)
 
+            self.masks_for_overlay = []
             self.overlay_images = []
 
         latent_mask = self.latent_mask if self.latent_mask is not None else image_mask
@@ -1459,10 +1519,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image = images.resize_image(self.resize_mode, image, self.width, self.height)
 
             if image_mask is not None:
-                image_masked = Image.new('RGBa', (image.width, image.height))
-                image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
-
-                self.overlay_images.append(image_masked.convert('RGBA'))
+                self.overlay_images.append(image)
+                self.masks_for_overlay.append(image_mask)
 
             # crop_region is not None if we are doing inpaint full res
             if crop_region is not None:
@@ -1486,6 +1544,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             if self.overlay_images is not None:
                 self.overlay_images = self.overlay_images * self.batch_size
 
+            if self.masks_for_overlay is not None:
+                self.masks_for_overlay = self.masks_for_overlay * self.batch_size
+
             if self.color_corrections is not None and len(self.color_corrections) == 1:
                 self.color_corrections = self.color_corrections * self.batch_size
 
-- 
cgit v1.2.3


From d3fdc4af61b7560eede52290e1ede48185680089 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 3 Dec 2023 18:22:00 +0900
Subject: rework mask and mask_composite logic

---
 modules/processing.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 4f265801..6f01c95f 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -938,21 +938,20 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 if opts.enable_pnginfo:
                     image.info["parameters"] = text
                 output_images.append(image)
-                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
-                    image_mask = p.mask_for_overlay.convert('RGB')
-                    image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
-
-                    if save_samples and opts.save_mask:
-                        images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
-
-                    if save_samples and opts.save_mask_composite:
-                        images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
-
-                    if opts.return_mask:
-                        output_images.append(image_mask)
-
-                    if opts.return_mask_composite:
-                        output_images.append(image_mask_composite)
+                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
+                    if opts.return_mask or opts.save_mask:
+                        image_mask = p.mask_for_overlay.convert('RGB')
+                        if save_samples and opts.save_mask:
+                            images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
+                        if opts.return_mask:
+                            output_images.append(image_mask)
+
+                    if opts.return_mask_composite or opts.save_mask_composite:
+                        image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
+                        if save_samples and opts.save_mask_composite:
+                            images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
+                        if opts.return_mask_composite:
+                            output_images.append(image_mask_composite)
 
             del x_samples_ddim
 
-- 
cgit v1.2.3


From 552f8bc832cd21ee0338e08b6a701687d0d79fad Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Sun, 3 Dec 2023 14:49:41 -0700
Subject: "Uncrop" the original denoised image for the composite step, fixing a
 "ValueError: Images do not match" *shudder*

---
 modules/processing.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 66aaab83..cd7216f8 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -994,6 +994,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 # we need to keep the original image around
                 # and use it in the composite step.
                 original_denoised_image = image.copy()
+
+                if p.paste_to is not None:
+                    original_denoised_image = uncrop(original_denoised_image, (p.overlay_images[i].width, p.overlay_images[i].height), p.paste_to)
+
                 image = apply_overlay(image, p.paste_to, i, p.overlay_images)
 
                 if save_samples:
-- 
cgit v1.2.3


From aaacf4823241450d88315af9d465d6815119fe0d Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 01:27:22 -0700
Subject: Organized the settings and UI of soft inpainting to allow for
 toggling the feature, and centralizes default values to reduce the amount of
 copy-pasta.

---
 modules/processing.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index cd7216f8..b209c84a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -30,6 +30,7 @@ import modules.sd_models as sd_models
 import modules.sd_vae as sd_vae
 from ldm.data.util import AddMiDaS
 from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
+import modules.soft_inpainting as si
 
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
@@ -1425,9 +1426,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_x: int = 4
     mask_blur_y: int = 4
     mask_blur: int = None
-    mask_blend_power: float = 1
-    mask_blend_scale: float = 0.5
-    inpaint_detail_preservation: float = 4
+    soft_inpainting: si.SoftInpaintingParameters = si.default
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
-- 
cgit v1.2.3


From 259d33c3c8e27557cb9bab9b3a1dd7fc7450d16c Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 01:57:21 -0700
Subject: Enables the original functionality to be toggled on and off.

---
 modules/processing.py | 99 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 29 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b209c84a..b40b1a40 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -88,9 +88,12 @@ def apply_overlay(image, paste_loc, index, overlays):
 
     return image
 
-def create_binary_mask(image):
+def create_binary_mask(image, round=True):
     if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255):
-        image = image.split()[-1].convert("L")
+        if round:
+            image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
+        else:
+            image = image.split()[-1].convert("L")
     else:
         image = image.convert('L')
     return image
@@ -316,7 +319,7 @@ class StableDiffusionProcessing:
             c_adm = torch.cat((c_adm, noise_level_emb), 1)
         return c_adm
 
-    def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None):
+    def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
         self.is_using_inpainting_conditioning = True
 
         # Handle the different mask inputs
@@ -327,6 +330,11 @@ class StableDiffusionProcessing:
                 conditioning_mask = np.array(image_mask.convert("L"))
                 conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
                 conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
+
+                if round_image_mask:
+                    # Caller is requesting a discretized mask as input, so we round to either 1.0 or 0.0
+                    conditioning_mask = torch.round(conditioning_mask)
+
         else:
             conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:])
 
@@ -350,7 +358,7 @@ class StableDiffusionProcessing:
 
         return image_conditioning
 
-    def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
+    def img2img_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
         source_image = devices.cond_cast_float(source_image)
 
         # HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
@@ -362,7 +370,10 @@ class StableDiffusionProcessing:
             return self.edit_image_conditioning(source_image)
 
         if self.sampler.conditioning_key in {'hybrid', 'concat'}:
-            return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+            return self.inpainting_image_conditioning(source_image,
+                                                      latent_image,
+                                                      image_mask=image_mask,
+                                                      round_image_mask=round_image_mask)
 
         if self.sampler.conditioning_key == "crossattn-adm":
             return self.unclip_image_conditioning(source_image)
@@ -878,8 +889,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
+
                 # Generate the mask(s) based on similarity between the original and denoised latent vectors
-                if getattr(p, "image_mask", None) is not None:
+                if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
                     # latent_mask = p.nmask[0].float().cpu()
 
                     # convert the original mask into a form we use to scale distances for thresholding
@@ -911,7 +923,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                         converted_mask = converted_mask.astype(np.uint8)
                         converted_mask = Image.fromarray(converted_mask)
                         converted_mask = images.resize_image(2, converted_mask, p.width, p.height)
-                        converted_mask = create_binary_mask(converted_mask)
+                        converted_mask = create_binary_mask(converted_mask, round=False)
 
                         # Remove aliasing artifacts using a gaussian blur.
                         converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
@@ -1010,23 +1022,33 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 if opts.enable_pnginfo:
                     image.info["parameters"] = text
                 output_images.append(image)
-                if save_samples and hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
-                    image_mask = p.masks_for_overlay[i].convert('RGB')
-                    image_mask_composite = Image.composite(
-                        original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size),
-                        images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA')
-
-                    if opts.save_mask:
-                        images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
-
-                    if opts.save_mask_composite:
-                        images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
-
-                    if opts.return_mask:
-                        output_images.append(image_mask)
-
-                    if opts.return_mask_composite:
-                        output_images.append(image_mask_composite)
+                if save_samples and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
+                    if hasattr(p, 'masks_for_overlay') and p.masks_for_overlay:
+                        image_mask = p.masks_for_overlay[i].convert('RGB')
+                        image_mask_composite = Image.composite(
+                            original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size),
+                            images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA')
+                    elif hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
+                        image_mask = p.mask_for_overlay.convert('RGB')
+                        image_mask_composite = Image.composite(
+                            original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size),
+                            images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
+                    else:
+                        image_mask = None
+                        image_mask_composite = None
+
+                    if image_mask is not None and image_mask_composite is not None:
+                        if opts.save_mask:
+                            images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
+
+                        if opts.save_mask_composite:
+                            images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
+
+                        if opts.return_mask:
+                            output_images.append(image_mask)
+
+                        if opts.return_mask_composite:
+                            output_images.append(image_mask_composite)
 
             del x_samples_ddim
 
@@ -1439,6 +1461,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     nmask: torch.Tensor = field(default=None, init=False)
     image_conditioning: torch.Tensor = field(default=None, init=False)
     init_img_hash: str = field(default=None, init=False)
+    mask_for_overlay: Image = field(default=None, init=False)
     init_latent: torch.Tensor = field(default=None, init=False)
 
     def __post_init__(self):
@@ -1471,7 +1494,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         if image_mask is not None:
             # image_mask is passed in as RGBA by Gradio to support alpha masks,
             # but we still want to support binary masks.
-            image_mask = create_binary_mask(image_mask)
+            image_mask = create_binary_mask(image_mask, round=(self.soft_inpainting is None))
 
             if self.inpainting_mask_invert:
                 image_mask = ImageOps.invert(image_mask)
@@ -1489,6 +1512,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = Image.fromarray(np_mask)
 
             if self.inpaint_full_res:
+                self.mask_for_overlay = image_mask if self.soft_inpainting is None else None
                 mask = image_mask.convert('L')
                 crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
                 crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
@@ -1500,7 +1524,12 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             else:
                 image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
 
-            self.masks_for_overlay = []
+                if self.soft_inpainting is None:
+                    np_mask = np.array(image_mask)
+                    np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
+                    self.mask_for_overlay = Image.fromarray(np_mask)
+
+            self.masks_for_overlay = [] if self.soft_inpainting is not None else None
             self.overlay_images = []
 
         latent_mask = self.latent_mask if self.latent_mask is not None else image_mask
@@ -1522,8 +1551,15 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image = images.resize_image(self.resize_mode, image, self.width, self.height)
 
             if image_mask is not None:
-                self.overlay_images.append(image)
-                self.masks_for_overlay.append(image_mask)
+                if self.soft_inpainting is not None:
+                    # We apply the masks AFTER to adjust mask based on changed content.
+                    self.overlay_images.append(image)
+                    self.masks_for_overlay.append(image_mask)
+                else:
+                    image_masked = Image.new('RGBa', (image.width, image.height))
+                    image_masked.paste(image.convert("RGBA").convert("RGBa"),
+                                       mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+                    self.overlay_images.append(image_masked.convert('RGBA'))
 
             # crop_region is not None if we are doing inpaint full res
             if crop_region is not None:
@@ -1576,6 +1612,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
             latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
             latmask = latmask[0]
+            if self.soft_inpainting is None:
+                latmask = np.around(latmask)
             latmask = np.tile(latmask[None], (4, 1, 1))
 
             self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype)
@@ -1587,7 +1625,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
-        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask)
+        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1,
+                                                                  self.init_latent,
+                                                                  image_mask,
+                                                                  self.soft_inpainting is None)
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         x = self.rng.next()
-- 
cgit v1.2.3


From 976c1053efeb5054692ed3cfa294cf79196f3946 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 16:06:58 -0700
Subject: Cleaned up code, moved main code contributions into
 soft_inpainting.py

---
 modules/processing.py | 56 +++++++--------------------------------------------
 1 file changed, 7 insertions(+), 49 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b40b1a40..0b360387 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -892,55 +892,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
                 # Generate the mask(s) based on similarity between the original and denoised latent vectors
                 if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    # latent_mask = p.nmask[0].float().cpu()
-
-                    # convert the original mask into a form we use to scale distances for thresholding
-                    # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2))
-                    # mask_scalar = mask_scalar / (1.00001-mask_scalar)
-                    # mask_scalar = mask_scalar.numpy()
-
-                    latent_orig = p.init_latent
-                    latent_proc = samples_ddim
-                    latent_distance = torch.norm(latent_proc - latent_orig, p=2, dim=1)
-
-                    kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2)
-
-                    for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, p.overlay_images)):
-                        converted_mask = distance_map.float().cpu().numpy()
-                        converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center,
-                                                       percentile_min=0.9, percentile_max=1, min_width=1)
-                        converted_mask = images.weighted_histogram_filter(converted_mask,  kernel, kernel_center,
-                                                       percentile_min=0.25, percentile_max=0.75, min_width=1)
-
-                        # The distance at which opacity of original decreases to 50%
-                        # half_weighted_distance = 1  # * mask_scalar
-                        # converted_mask = converted_mask / half_weighted_distance
-
-                        converted_mask = 1 / (1 + converted_mask ** 2)
-                        converted_mask = images.smootherstep(converted_mask)
-                        converted_mask = 1 - converted_mask
-                        converted_mask = 255. * converted_mask
-                        converted_mask = converted_mask.astype(np.uint8)
-                        converted_mask = Image.fromarray(converted_mask)
-                        converted_mask = images.resize_image(2, converted_mask, p.width, p.height)
-                        converted_mask = create_binary_mask(converted_mask, round=False)
-
-                        # Remove aliasing artifacts using a gaussian blur.
-                        converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
-
-                        # Expand the mask to fit the whole image if needed.
-                        if p.paste_to is not None:
-                            converted_mask = uncrop(converted_mask,
-                                                    (overlay_image.width, overlay_image.height),
-                                                    p.paste_to)
-
-                        p.masks_for_overlay[i] = converted_mask
-
-                        image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
-                        image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
-                                           mask=ImageOps.invert(converted_mask.convert('L')))
-
-                        p.overlay_images[i] = image_masked.convert('RGBA')
+                    si.generate_adaptive_masks(latent_orig=p.init_latent,
+                                               latent_processed=samples_ddim,
+                                               overlay_images=p.overlay_images,
+                                               masks_for_overlay=p.masks_for_overlay,
+                                               width=p.width,
+                                               height=p.height,
+                                               paste_to=p.paste_to)
 
                 x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim,
                                                      target_device=devices.cpu,
-- 
cgit v1.2.3


From 57f29bd61dc30f1a8c94ead9b780f4655f7d7d6d Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 17:41:18 -0700
Subject: Re-introduce latent blending step from the vanilla inpainting
 procedure.

---
 modules/processing.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 0b360387..c8dc4d93 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1597,6 +1597,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
+        if self.mask is not None and self.soft_inpainting is None:
+            samples = samples * self.nmask + self.init_latent * self.mask
+
         del x
         devices.torch_gc()
 
-- 
cgit v1.2.3


From 60c602232fd760fb548fb0b3d18b5297f8823c2a Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 17:41:51 -0700
Subject: Restored original formatting.

---
 modules/processing.py | 36 +++++++++++-------------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index c8dc4d93..90ae249a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -370,10 +370,7 @@ class StableDiffusionProcessing:
             return self.edit_image_conditioning(source_image)
 
         if self.sampler.conditioning_key in {'hybrid', 'concat'}:
-            return self.inpainting_image_conditioning(source_image,
-                                                      latent_image,
-                                                      image_mask=image_mask,
-                                                      round_image_mask=round_image_mask)
+            return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask, round_image_mask=round_image_mask)
 
         if self.sampler.conditioning_key == "crossattn-adm":
             return self.unclip_image_conditioning(source_image)
@@ -885,7 +882,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
-                # todo: generate masks the old fashioned way
+                # todo: generate adaptive masks based on pixel differences.
+                # if p.masks_for_overlay is used, it will already be populated with masks
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
@@ -900,9 +898,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                                                height=p.height,
                                                paste_to=p.paste_to)
 
-                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim,
-                                                     target_device=devices.cpu,
-                                                     check_for_nans=True)
+                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
@@ -927,9 +923,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 x_samples_ddim = batch_params.images
 
             def infotext(index=0, use_main_prompt=False):
-                return create_infotext(p, p.prompts, p.seeds, p.subseeds,
-                                       use_main_prompt=use_main_prompt, index=index,
-                                       all_negative_prompts=p.negative_prompts)
+                return create_infotext(p, p.prompts, p.seeds, p.subseeds, use_main_prompt=use_main_prompt, index=index, all_negative_prompts=p.negative_prompts)
 
             save_samples = p.save_samples()
 
@@ -972,8 +966,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 image = apply_overlay(image, p.paste_to, i, p.overlay_images)
 
                 if save_samples:
-                    images.save_image(image, p.outpath_samples, "", p.seeds[i],
-                                      p.prompts[i], opts.samples_format, info=infotext(i), p=p)
+                    images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p)
 
                 text = infotext(i)
                 infotexts.append(text)
@@ -983,14 +976,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 if save_samples and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
                     if hasattr(p, 'masks_for_overlay') and p.masks_for_overlay:
                         image_mask = p.masks_for_overlay[i].convert('RGB')
-                        image_mask_composite = Image.composite(
-                            original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size),
-                            images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA')
+                        image_mask_composite = Image.composite(original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA')
                     elif hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
                         image_mask = p.mask_for_overlay.convert('RGB')
-                        image_mask_composite = Image.composite(
-                            original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size),
-                            images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
+                        image_mask_composite = Image.composite(original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
                     else:
                         image_mask = None
                         image_mask_composite = None
@@ -1515,8 +1504,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                     self.masks_for_overlay.append(image_mask)
                 else:
                     image_masked = Image.new('RGBa', (image.width, image.height))
-                    image_masked.paste(image.convert("RGBA").convert("RGBa"),
-                                       mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+                    image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+
                     self.overlay_images.append(image_masked.convert('RGBA'))
 
             # crop_region is not None if we are doing inpaint full res
@@ -1583,10 +1572,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
-        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1,
-                                                                  self.init_latent,
-                                                                  image_mask,
-                                                                  self.soft_inpainting is None)
+        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.soft_inpainting is None)
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         x = self.rng.next()
-- 
cgit v1.2.3


From b32a334e3da7b06d82441beaa08a673b4f55bca1 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 17:57:10 -0700
Subject: Applies a convert('RGBA') operation early to mimic previous
 behaviour.

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 90ae249a..7fc282cf 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1500,7 +1500,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             if image_mask is not None:
                 if self.soft_inpainting is not None:
                     # We apply the masks AFTER to adjust mask based on changed content.
-                    self.overlay_images.append(image)
+                    self.overlay_images.append(image.convert('RGBA'))
                     self.masks_for_overlay.append(image_mask)
                 else:
                     image_masked = Image.new('RGBa', (image.width, image.height))
-- 
cgit v1.2.3


From 6fc12428e3c5f903584ca7986e0c441f80fa2807 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 19:42:59 -0700
Subject: Fixed issue where batched inpainting (batch size > 1) wouldn't work
 because of mismatched tensor sizes. The 'already_decoded' decoded case should
 also be handled correctly (tested indirectly).

---
 modules/processing.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 7fc282cf..71bb056a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -883,20 +883,27 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
                 # todo: generate adaptive masks based on pixel differences.
-                # if p.masks_for_overlay is used, it will already be populated with masks
+                if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
+                    si.apply_masks(soft_inpainting=p.soft_inpainting,
+                                   nmask=p.nmask,
+                                   overlay_images=p.overlay_images,
+                                   masks_for_overlay=p.masks_for_overlay,
+                                   width=p.width,
+                                   height=p.height,
+                                   paste_to=p.paste_to)
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
 
                 # Generate the mask(s) based on similarity between the original and denoised latent vectors
                 if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    si.generate_adaptive_masks(latent_orig=p.init_latent,
-                                               latent_processed=samples_ddim,
-                                               overlay_images=p.overlay_images,
-                                               masks_for_overlay=p.masks_for_overlay,
-                                               width=p.width,
-                                               height=p.height,
-                                               paste_to=p.paste_to)
+                    si.apply_adaptive_masks(latent_orig=p.init_latent,
+                                            latent_processed=samples_ddim,
+                                            overlay_images=p.overlay_images,
+                                            masks_for_overlay=p.masks_for_overlay,
+                                            width=p.width,
+                                            height=p.height,
+                                            paste_to=p.paste_to)
 
                 x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
-- 
cgit v1.2.3


From 49bbf1140731036875573bb7c44aa7e74623c856 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Mon, 4 Dec 2023 19:47:40 -0700
Subject: Fixed unused import.

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 71bb056a..e1823ac3 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
 
 import torch
 import numpy as np
-from PIL import Image, ImageOps, ImageFilter
+from PIL import Image, ImageOps
 import random
 import cv2
 from skimage import exposure
-- 
cgit v1.2.3


From ac4578912395627731f2cd8529f87a95df1f7644 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Wed, 6 Dec 2023 21:16:27 -0700
Subject: Removed soft inpainting, added hooks for softpainting to work
 instead.

---
 modules/processing.py | 94 +++++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 56 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 7d46949f..5a1a90af 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -30,7 +30,6 @@ import modules.sd_models as sd_models
 import modules.sd_vae as sd_vae
 from ldm.data.util import AddMiDaS
 from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
-import modules.soft_inpainting as si
 
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
@@ -73,12 +72,10 @@ def uncrop(image, dest_size, paste_loc):
     return image
 
 
-def apply_overlay(image, paste_loc, index, overlays):
-    if overlays is None or index >= len(overlays):
+def apply_overlay(image, paste_loc, overlay):
+    if overlay is None:
         return image
 
-    overlay = overlays[index]
-
     if paste_loc is not None:
         image = uncrop(image, (overlay.width, overlay.height), paste_loc)
 
@@ -150,7 +147,6 @@ class StableDiffusionProcessing:
     do_not_save_grid: bool = False
     extra_generation_params: dict[str, Any] = None
     overlay_images: list = None
-    masks_for_overlay: list = None
     eta: float = None
     do_not_reload_embeddings: bool = False
     denoising_strength: float = None
@@ -880,31 +876,17 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
+            if p.scripts is not None:
+                ps = scripts.PostSampleArgs(samples_ddim)
+                p.scripts.post_sample(p, ps)
+                samples_ddim = pp.samples
+
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
-                # todo: generate adaptive masks based on pixel differences.
-                if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    si.apply_masks(soft_inpainting=p.soft_inpainting,
-                                   nmask=p.nmask,
-                                   overlay_images=p.overlay_images,
-                                   masks_for_overlay=p.masks_for_overlay,
-                                   width=p.width,
-                                   height=p.height,
-                                   paste_to=p.paste_to)
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
 
-                # Generate the mask(s) based on similarity between the original and denoised latent vectors
-                if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None:
-                    si.apply_adaptive_masks(latent_orig=p.init_latent,
-                                            latent_processed=samples_ddim,
-                                            overlay_images=p.overlay_images,
-                                            masks_for_overlay=p.masks_for_overlay,
-                                            width=p.width,
-                                            height=p.height,
-                                            paste_to=p.paste_to)
-
                 x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -955,9 +937,18 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     pp = scripts.PostprocessImageArgs(image)
                     p.scripts.postprocess_image(p, pp)
                     image = pp.image
+
+                mask_for_overlay = p.mask_for_overlay
+                overlay_image = p.overlay_images[i] if p.overlay_images is not None and i < len(p.overlay_images) else None
+
+                if p.scripts is not None:
+                    ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image)
+                    p.scripts.postprocess_maskoverlay(p, ppmo)
+                    mask_for_overlay, overlay_image = pp.mask_for_overlay, pp.overlay_image
+
                 if p.color_corrections is not None and i < len(p.color_corrections):
                     if save_samples and opts.save_images_before_color_correction:
-                        image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images)
+                        image_without_cc = apply_overlay(image, p.paste_to, overlay_image)
                         images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction")
                     image = apply_color_correction(p.color_corrections[i], image)
 
@@ -968,9 +959,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 original_denoised_image = image.copy()
 
                 if p.paste_to is not None:
-                    original_denoised_image = uncrop(original_denoised_image, (p.overlay_images[i].width, p.overlay_images[i].height), p.paste_to)
+                    original_denoised_image = uncrop(original_denoised_image, (p.overlay_image.width, p.overlay_image.height), p.paste_to)
 
-                image = apply_overlay(image, p.paste_to, i, p.overlay_images)
+                image = apply_overlay(image, p.paste_to, overlay_image)
 
                 if save_samples:
                     images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p)
@@ -981,13 +972,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     image.info["parameters"] = text
                 output_images.append(image)
 
-                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay:
-                    mask_for_overlay = p.mask_for_overlay
-                elif hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and p.masks_for_overlay[i]:
-                    mask_for_overlay = p.masks_for_overlay[i]
-                else:
-                    mask_for_overlay = None
-
                 if mask_for_overlay is not None:
                     if opts.return_mask or opts.save_mask:
                         image_mask = mask_for_overlay.convert('RGB')
@@ -1401,7 +1385,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     mask_blur_x: int = 4
     mask_blur_y: int = 4
     mask_blur: int = None
-    soft_inpainting: si.SoftInpaintingParameters = si.default
+    mask_round: bool = True
     inpainting_fill: int = 0
     inpaint_full_res: bool = True
     inpaint_full_res_padding: int = 0
@@ -1447,7 +1431,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         if image_mask is not None:
             # image_mask is passed in as RGBA by Gradio to support alpha masks,
             # but we still want to support binary masks.
-            image_mask = create_binary_mask(image_mask, round=(self.soft_inpainting is None))
+            image_mask = create_binary_mask(image_mask, round=self.mask_round)
 
             if self.inpainting_mask_invert:
                 image_mask = ImageOps.invert(image_mask)
@@ -1465,7 +1449,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image_mask = Image.fromarray(np_mask)
 
             if self.inpaint_full_res:
-                self.mask_for_overlay = image_mask if self.soft_inpainting is None else None
+                self.mask_for_overlay = image_mask
                 mask = image_mask.convert('L')
                 crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
                 crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
@@ -1476,13 +1460,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 self.paste_to = (x1, y1, x2-x1, y2-y1)
             else:
                 image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
+                np_mask = np.array(image_mask)
+                np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
+                self.mask_for_overlay = Image.fromarray(np_mask)
 
-                if self.soft_inpainting is None:
-                    np_mask = np.array(image_mask)
-                    np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
-                    self.mask_for_overlay = Image.fromarray(np_mask)
-
-            self.masks_for_overlay = [] if self.soft_inpainting is not None else None
             self.overlay_images = []
 
         latent_mask = self.latent_mask if self.latent_mask is not None else image_mask
@@ -1504,15 +1485,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
                 image = images.resize_image(self.resize_mode, image, self.width, self.height)
 
             if image_mask is not None:
-                if self.soft_inpainting is not None:
-                    # We apply the masks AFTER to adjust mask based on changed content.
-                    self.overlay_images.append(image.convert('RGBA'))
-                    self.masks_for_overlay.append(image_mask)
-                else:
-                    image_masked = Image.new('RGBa', (image.width, image.height))
-                    image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+                image_masked = Image.new('RGBa', (image.width, image.height))
+                image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
 
-                    self.overlay_images.append(image_masked.convert('RGBA'))
+                self.overlay_images.append(image_masked.convert('RGBA'))
 
             # crop_region is not None if we are doing inpaint full res
             if crop_region is not None:
@@ -1565,7 +1541,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
             latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
             latmask = latmask[0]
-            if self.soft_inpainting is None:
+            if self.mask_round:
                 latmask = np.around(latmask)
             latmask = np.tile(latmask[None], (4, 1, 1))
 
@@ -1578,7 +1554,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
-        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.soft_inpainting is None)
+        self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.mask_round)
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         x = self.rng.next()
@@ -1589,8 +1565,14 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
-        if self.mask is not None and self.soft_inpainting is None:
-            samples = samples * self.nmask + self.init_latent * self.mask
+        blended_samples = samples * self.nmask + self.init_latent * self.mask
+
+        if self.scripts is not None:
+            mba = scripts.MaskBlendArgs(self, samples, self.nmask, self.init_latent, self.mask, blended_samples, sigma=None, is_final_blend=True)
+            self.scripts.on_mask_blend(self, mba)
+            blended_samples = mba.blended_latent
+
+        samples = blended_samples
 
         del x
         devices.torch_gc()
-- 
cgit v1.2.3


From 2abc417834d752e43a283f8603bfddfb1c80b30f Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Wed, 6 Dec 2023 22:25:53 -0700
Subject: Re-implemented soft inpainting via a script. Also fixed some mistakes
 with the previous hooks, removed unnecessary formatting changes, removed code
 that I had forgotten to.

---
 modules/processing.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 5a1a90af..f8d85bdf 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -879,14 +879,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if p.scripts is not None:
                 ps = scripts.PostSampleArgs(samples_ddim)
                 p.scripts.post_sample(p, ps)
-                samples_ddim = pp.samples
+                samples_ddim = ps.samples
 
             if getattr(samples_ddim, 'already_decoded', False):
                 x_samples_ddim = samples_ddim
             else:
                 if opts.sd_vae_decode_method != 'Full':
                     p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
-
                 x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
 
             x_samples_ddim = torch.stack(x_samples_ddim).float()
@@ -944,7 +943,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 if p.scripts is not None:
                     ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image)
                     p.scripts.postprocess_maskoverlay(p, ppmo)
-                    mask_for_overlay, overlay_image = pp.mask_for_overlay, pp.overlay_image
+                    mask_for_overlay, overlay_image = ppmo.mask_for_overlay, ppmo.overlay_image
 
                 if p.color_corrections is not None and i < len(p.color_corrections):
                     if save_samples and opts.save_images_before_color_correction:
@@ -959,7 +958,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 original_denoised_image = image.copy()
 
                 if p.paste_to is not None:
-                    original_denoised_image = uncrop(original_denoised_image, (p.overlay_image.width, p.overlay_image.height), p.paste_to)
+                    original_denoised_image = uncrop(original_denoised_image, (overlay_image.width, overlay_image.height), p.paste_to)
 
                 image = apply_overlay(image, p.paste_to, overlay_image)
 
@@ -1512,9 +1511,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             if self.overlay_images is not None:
                 self.overlay_images = self.overlay_images * self.batch_size
 
-            if self.masks_for_overlay is not None:
-                self.masks_for_overlay = self.masks_for_overlay * self.batch_size
-
             if self.color_corrections is not None and len(self.color_corrections) == 1:
                 self.color_corrections = self.color_corrections * self.batch_size
 
@@ -1565,14 +1561,15 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
 
-        blended_samples = samples * self.nmask + self.init_latent * self.mask
+        if self.mask is not None:
+            blended_samples = samples * self.nmask + self.init_latent * self.mask
 
-        if self.scripts is not None:
-            mba = scripts.MaskBlendArgs(self, samples, self.nmask, self.init_latent, self.mask, blended_samples, sigma=None, is_final_blend=True)
-            self.scripts.on_mask_blend(self, mba)
-            blended_samples = mba.blended_latent
+            if self.scripts is not None:
+                mba = scripts.MaskBlendArgs(samples, self.nmask, self.init_latent, self.mask, blended_samples)
+                self.scripts.on_mask_blend(self, mba)
+                blended_samples = mba.blended_latent
 
-        samples = blended_samples
+            samples = blended_samples
 
         del x
         devices.torch_gc()
-- 
cgit v1.2.3


From 0ef4a4cb2365051b1e308f0136a0d8c01d071569 Mon Sep 17 00:00:00 2001
From: CodeHatchling <steve@codehatch.com>
Date: Thu, 7 Dec 2023 14:54:26 -0700
Subject: Fixed error that occurs when using vanilla samplers (somehow).

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index f8d85bdf..bea01ec6 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -937,8 +937,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                     p.scripts.postprocess_image(p, pp)
                     image = pp.image
 
-                mask_for_overlay = p.mask_for_overlay
-                overlay_image = p.overlay_images[i] if p.overlay_images is not None and i < len(p.overlay_images) else None
+                mask_for_overlay = getattr(p, "mask_for_overlay", None)
+                overlay_image = p.overlay_images[i] if getattr(p, "overlay_images", None) is not None and i < len(p.overlay_images) else None
 
                 if p.scripts is not None:
                     ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image)
-- 
cgit v1.2.3


From 1242ba08e19f3d317bdc5924db2b73d0c9569a7f Mon Sep 17 00:00:00 2001
From: gayshub <zhangxianglei1234@gmail.com>
Date: Fri, 15 Dec 2023 16:57:17 +0800
Subject: add allow specify the task id and get the location of task in the
 queue of pending task

---
 modules/processing.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index e124e7f0..657cacfc 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1023,6 +1023,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
     hr_sampler_name: str = None
     hr_prompt: str = ''
     hr_negative_prompt: str = ''
+    force_task_id: str = None
 
     cached_hr_uc = [None, None]
     cached_hr_c = [None, None]
@@ -1358,6 +1359,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     inpainting_mask_invert: int = 0
     initial_noise_multiplier: float = None
     latent_mask: Image = None
+    force_task_id: string = None
 
     image_mask: Any = field(default=None, init=False)
 
-- 
cgit v1.2.3


From d859de37d9ec10cb6c804226328a11c87c444852 Mon Sep 17 00:00:00 2001
From: gayshub <zhangxianglei1234@gmail.com>
Date: Fri, 15 Dec 2023 17:48:20 +0800
Subject: fix the problem of ruff of github

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 657cacfc..5added65 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1359,7 +1359,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     inpainting_mask_invert: int = 0
     initial_noise_multiplier: float = None
     latent_mask: Image = None
-    force_task_id: string = None
+    force_task_id: str = None
 
     image_mask: Any = field(default=None, init=False)
 
-- 
cgit v1.2.3


From ea272152e0b50dbb2bd675ec020607f3d50c37d0 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 16 Dec 2023 15:08:08 +0800
Subject: Add FP8 settings into PNG info

---
 modules/processing.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index bea01ec6..179f2c0f 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -688,6 +688,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Size": f"{p.width}x{p.height}",
         "Model hash": p.sd_model_hash if opts.add_model_hash_to_info else None,
         "Model": p.sd_model_name if opts.add_model_name_to_info else None,
+        "FP8 weight": opts.fp8_storage if devices.fp8 else None,
+        "Cache FP16 weight for LoRA": opts.cache_fp16_weight if devices.fp8 else None,
         "VAE hash": p.sd_vae_hash if opts.add_vae_hash_to_info else None,
         "VAE": p.sd_vae_name if opts.add_vae_name_to_info else None,
         "Variation seed": (None if p.subseed_strength == 0 else (p.all_subseeds[0] if use_main_prompt else all_subseeds[index])),
-- 
cgit v1.2.3


From a97832033427096072d5ea914adac3662cda4fd1 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 16 Dec 2023 19:39:43 +0800
Subject: Let fp8-related settings to invalidate cond_cache

---
 modules/processing.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index dd97b4ee..9351e3fb 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -431,6 +431,8 @@ class StableDiffusionProcessing:
             opts.sdxl_crop_top,
             self.width,
             self.height,
+            opts.fp8_storage,
+            opts.cache_fp16_weight,
         )
 
     def get_conds_with_caching(self, function, required_prompts, steps, caches, extra_network_data, hires_steps=None):
-- 
cgit v1.2.3


From 9feb034e343d6d7ef63395821658fb3774b30a24 Mon Sep 17 00:00:00 2001
From: wangqyqq <wangqyqq@163.com>
Date: Thu, 21 Dec 2023 20:15:51 +0800
Subject: support for sdxl-inpaint model

---
 modules/processing.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 6f01c95f..159548db 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -106,6 +106,20 @@ def txt2img_image_conditioning(sd_model, x, width, height):
         return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device)
 
     else:
+        sd = sd_model.model.state_dict()
+        diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
+        if diffusion_model_input.shape[1] == 9:
+            # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
+            image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
+            image_conditioning = images_tensor_to_samples(image_conditioning,
+                                                          approximation_indexes.get(opts.sd_vae_encode_method))
+
+            # Add the fake full 1s mask to the first dimension.
+            image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+            image_conditioning = image_conditioning.to(x.dtype)
+
+            return image_conditioning
+
         # Dummy zero conditioning if we're not using inpainting or unclip models.
         # Still takes up a bit of memory, but no encoder call.
         # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
@@ -362,6 +376,11 @@ class StableDiffusionProcessing:
         if self.sampler.conditioning_key == "crossattn-adm":
             return self.unclip_image_conditioning(source_image)
 
+        sd = self.sampler.model_wrap.inner_model.model.state_dict()
+        diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
+        if diffusion_model_input.shape[1] == 9:
+            return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+
         # Dummy zero conditioning if we're not using inpainting or depth model.
         return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
 
-- 
cgit v1.2.3


From bfe418a58d39c69ca2672e7d8a1fd7ad2b34869b Mon Sep 17 00:00:00 2001
From: wangqyqq <wangqyqq@163.com>
Date: Wed, 27 Dec 2023 10:20:56 +0800
Subject: add some codes for robust

---
 modules/processing.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 159548db..c05e608a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -108,17 +108,18 @@ def txt2img_image_conditioning(sd_model, x, width, height):
     else:
         sd = sd_model.model.state_dict()
         diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
-        if diffusion_model_input.shape[1] == 9:
-            # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
-            image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
-            image_conditioning = images_tensor_to_samples(image_conditioning,
-                                                          approximation_indexes.get(opts.sd_vae_encode_method))
+        if diffusion_model_input is not None:
+            if diffusion_model_input.shape[1] == 9:
+                # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
+                image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
+                image_conditioning = images_tensor_to_samples(image_conditioning,
+                                                              approximation_indexes.get(opts.sd_vae_encode_method))
 
-            # Add the fake full 1s mask to the first dimension.
-            image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
-            image_conditioning = image_conditioning.to(x.dtype)
+                # Add the fake full 1s mask to the first dimension.
+                image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+                image_conditioning = image_conditioning.to(x.dtype)
 
-            return image_conditioning
+                return image_conditioning
 
         # Dummy zero conditioning if we're not using inpainting or unclip models.
         # Still takes up a bit of memory, but no encoder call.
@@ -378,8 +379,9 @@ class StableDiffusionProcessing:
 
         sd = self.sampler.model_wrap.inner_model.model.state_dict()
         diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
-        if diffusion_model_input.shape[1] == 9:
-            return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+        if diffusion_model_input is not None:
+            if diffusion_model_input.shape[1] == 9:
+                return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
 
         # Dummy zero conditioning if we're not using inpainting or depth model.
         return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
-- 
cgit v1.2.3


From dc57ec0296e768ee91290e16ab262404837c566d Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Fri, 29 Dec 2023 01:56:48 +0900
Subject: save info of init image

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 9351e3fb..141f2f11 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1482,7 +1482,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             # Save init image
             if opts.save_init_img:
                 self.init_img_hash = hashlib.md5(img.tobytes()).hexdigest()
-                images.save_image(img, path=opts.outdir_init_images, basename=None, forced_filename=self.init_img_hash, save_to_dirs=False)
+                images.save_image(img, path=opts.outdir_init_images, basename=None, forced_filename=self.init_img_hash, save_to_dirs=False, existing_info=img.info)
 
             image = images.flatten(img, opts.img2img_background_color)
 
-- 
cgit v1.2.3


From bb07cb6a0df60a96827125ffc09ea182a1ed272c Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 17 Dec 2023 10:22:03 +0300
Subject: a

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 9351e3fb..ee2ccf46 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1135,7 +1135,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
     def init(self, all_prompts, all_seeds, all_subseeds):
         if self.enable_hr:
-            if self.hr_checkpoint_name:
+            if self.hr_checkpoint_name and self.hr_checkpoint_name != 'Use same checkpoint':
                 self.hr_checkpoint_info = sd_models.get_closet_checkpoint_match(self.hr_checkpoint_name)
 
                 if self.hr_checkpoint_info is None:
-- 
cgit v1.2.3


From d859cec696a953dbfd6f69f7735e68661748d579 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 1 Jan 2024 13:53:12 +0300
Subject: infotext.py: rename usages in the codebase

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 7789f9a4..b30df60d 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -16,7 +16,7 @@ from skimage import exposure
 from typing import Any
 
 import modules.sd_hijack
-from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors, rng
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram, infotext, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors, rng
 from modules.rng import slerp # noqa: F401
 from modules.sd_hijack import model_hijack
 from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes
@@ -733,7 +733,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "User": p.user if opts.add_user_name_to_info else None,
     }
 
-    generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
+    generation_params_text = ", ".join([k if k == v else f'{k}: {infotext.quote(v)}' for k, v in generation_params.items() if v is not None])
 
     prompt_text = p.main_prompt if use_main_prompt else all_prompts[index]
     negative_prompt_text = f"\nNegative prompt: {p.main_negative_prompt if use_main_prompt else all_negative_prompts[index]}" if all_negative_prompts[index] else ""
-- 
cgit v1.2.3


From ac0ecf3b4b9d147743c04f0ff4ddc4cf4595e11d Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 1 Jan 2024 16:28:58 +0300
Subject: option to convert VAE to bfloat16 (implementation of #9295)

---
 modules/processing.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 846e4796..f0656882 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -628,20 +628,33 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
         sample = decode_first_stage(model, batch[i:i + 1])[0]
 
         if check_for_nans:
+
             try:
                 devices.test_for_nans(sample, "vae")
             except devices.NansException as e:
-                if devices.dtype_vae == torch.float32 or not shared.opts.auto_vae_precision:
+                if shared.opts.auto_vae_precision_bfloat16:
+                    autofix_dtype = torch.bfloat16
+                    autofix_dtype_text = "bfloat16"
+                    autofix_dtype_setting = "Automatically convert VAE to bfloat16"
+                    autofix_dtype_comment = ""
+                elif shared.opts.auto_vae_precision:
+                    autofix_dtype = torch.float32
+                    autofix_dtype_text = "32-bit float"
+                    autofix_dtype_setting = "Automatically revert VAE to 32-bit floats"
+                    autofix_dtype_comment = "\nTo always start with 32-bit VAE, use --no-half-vae commandline flag."
+                else:
+                    raise e
+
+                if devices.dtype_vae == autofix_dtype:
                     raise e
 
                 errors.print_error_explanation(
                     "A tensor with all NaNs was produced in VAE.\n"
-                    "Web UI will now convert VAE into 32-bit float and retry.\n"
-                    "To disable this behavior, disable the 'Automatically revert VAE to 32-bit floats' setting.\n"
-                    "To always start with 32-bit VAE, use --no-half-vae commandline flag."
+                    f"Web UI will now convert VAE into {autofix_dtype_text} and retry.\n"
+                    f"To disable this behavior, disable the '{autofix_dtype_setting}' setting.{autofix_dtype_comment}"
                 )
 
-                devices.dtype_vae = torch.float32
+                devices.dtype_vae = autofix_dtype
                 model.first_stage_model.to(devices.dtype_vae)
                 batch = batch.to(devices.dtype_vae)
 
-- 
cgit v1.2.3