From 3bca90b249d749ed5429f76e380d2ffa52fc0d41 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 13:48:27 +0300
Subject: hires fix checkpoint selection

---
 modules/processing.py | 47 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b0992ee1..7026487a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -935,7 +935,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
     cached_hr_uc = [None, None]
     cached_hr_c = [None, None]
 
-    def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, hr_sampler_name: str = None, hr_prompt: str = '', hr_negative_prompt: str = '', **kwargs):
+    def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, hr_checkpoint_name: str = None, hr_sampler_name: str = None, hr_prompt: str = '', hr_negative_prompt: str = '', **kwargs):
         super().__init__(**kwargs)
         self.enable_hr = enable_hr
         self.denoising_strength = denoising_strength
@@ -946,11 +946,14 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         self.hr_resize_y = hr_resize_y
         self.hr_upscale_to_x = hr_resize_x
         self.hr_upscale_to_y = hr_resize_y
+        self.hr_checkpoint_name = hr_checkpoint_name
+        self.hr_checkpoint_info = None
         self.hr_sampler_name = hr_sampler_name
         self.hr_prompt = hr_prompt
         self.hr_negative_prompt = hr_negative_prompt
         self.all_hr_prompts = None
         self.all_hr_negative_prompts = None
+        self.latent_scale_mode = None
 
         if firstphase_width != 0 or firstphase_height != 0:
             self.hr_upscale_to_x = self.width
@@ -973,6 +976,14 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
     def init(self, all_prompts, all_seeds, all_subseeds):
         if self.enable_hr:
+            if self.hr_checkpoint_name:
+                self.hr_checkpoint_info = sd_models.get_closet_checkpoint_match(self.hr_checkpoint_name)
+
+                if self.hr_checkpoint_info is None:
+                    raise Exception(f'Could not find checkpoint with name {self.hr_checkpoint_name}')
+
+                self.extra_generation_params["Hires checkpoint"] = self.hr_checkpoint_info.short_title
+
             if self.hr_sampler_name is not None and self.hr_sampler_name != self.sampler_name:
                 self.extra_generation_params["Hires sampler"] = self.hr_sampler_name
 
@@ -982,6 +993,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             if tuple(self.hr_negative_prompt) != tuple(self.negative_prompt):
                 self.extra_generation_params["Hires negative prompt"] = self.hr_negative_prompt
 
+            self.latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "nearest")
+            if self.enable_hr and self.latent_scale_mode is None:
+                if not any(x.name == self.hr_upscaler for x in shared.sd_upscalers):
+                    raise Exception(f"could not find upscaler named {self.hr_upscaler}")
+
             if opts.use_old_hires_fix_width_height and self.applied_old_hires_behavior_to != (self.width, self.height):
                 self.hr_resize_x = self.width
                 self.hr_resize_y = self.height
@@ -1020,14 +1036,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
                     self.truncate_x = (self.hr_upscale_to_x - target_w) // opt_f
                     self.truncate_y = (self.hr_upscale_to_y - target_h) // opt_f
 
-            # special case: the user has chosen to do nothing
-            if self.hr_upscale_to_x == self.width and self.hr_upscale_to_y == self.height:
-                self.enable_hr = False
-                self.denoising_strength = None
-                self.extra_generation_params.pop("Hires upscale", None)
-                self.extra_generation_params.pop("Hires resize", None)
-                return
-
             if not state.processing_has_refined_job_count:
                 if state.job_count == -1:
                     state.job_count = self.n_iter
@@ -1045,17 +1053,22 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
 
-        latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "nearest")
-        if self.enable_hr and latent_scale_mode is None:
-            if not any(x.name == self.hr_upscaler for x in shared.sd_upscalers):
-                raise Exception(f"could not find upscaler named {self.hr_upscaler}")
-
         x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
         samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
 
         if not self.enable_hr:
             return samples
 
+        current = shared.sd_model.sd_checkpoint_info
+        try:
+            if self.hr_checkpoint_info is not None:
+                sd_models.reload_model_weights(info=self.hr_checkpoint_info)
+
+            return self.sample_hr_pass(samples, seeds, subseeds, subseed_strength, prompts)
+        finally:
+            sd_models.reload_model_weights(info=current)
+
+    def sample_hr_pass(self, samples, seeds, subseeds, subseed_strength, prompts):
         self.is_hr_pass = True
 
         target_width = self.hr_upscale_to_x
@@ -1073,11 +1086,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             info = create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index)
             images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, p=self, suffix="-before-highres-fix")
 
-        if latent_scale_mode is not None:
+        if self.latent_scale_mode is not None:
             for i in range(samples.shape[0]):
                 save_intermediate(samples, i)
 
-            samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=latent_scale_mode["mode"], antialias=latent_scale_mode["antialias"])
+            samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=self.latent_scale_mode["mode"], antialias=self.latent_scale_mode["antialias"])
 
             # Avoid making the inpainting conditioning unless necessary as
             # this does need some extra compute to decode / encode the image again.
@@ -1193,7 +1206,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         self.hr_uc = None
         self.hr_c = None
 
-        if self.enable_hr:
+        if self.enable_hr and self.hr_checkpoint_info is None:
             if shared.opts.hires_fix_use_firstpass_conds:
                 self.calculate_hr_conds()
 
-- 
cgit v1.2.3


From 40cd59207b96f9e522fdc104b43279880b671ce4 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 14:10:26 +0300
Subject: make it work with SDXL

---
 modules/processing.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 7026487a..b8af1301 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1197,8 +1197,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         if self.hr_c is not None:
             return
 
-        self.hr_uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, self.hr_negative_prompts, self.steps * self.step_multiplier, [self.cached_hr_uc, self.cached_uc], self.hr_extra_network_data)
-        self.hr_c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, self.hr_prompts, self.steps * self.step_multiplier, [self.cached_hr_c, self.cached_c], self.hr_extra_network_data)
+        hr_prompts = prompt_parser.SdConditioning(self.hr_prompts, width=self.hr_upscale_to_x, height=self.hr_upscale_to_y)
+        hr_negative_prompts = prompt_parser.SdConditioning(self.hr_negative_prompts, width=self.hr_upscale_to_x, height=self.hr_upscale_to_y, is_negative_prompt=True)
+
+        self.hr_uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, hr_negative_prompts, self.steps * self.step_multiplier, [self.cached_hr_uc, self.cached_uc], self.hr_extra_network_data)
+        self.hr_c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, hr_prompts, self.steps * self.step_multiplier, [self.cached_hr_c, self.cached_c], self.hr_extra_network_data)
 
     def setup_conds(self):
         super().setup_conds()
-- 
cgit v1.2.3


From 77761e7bad8a7cbffc9028dc0b2f63169aaf25f9 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 14:10:33 +0300
Subject: linter

---
 modules/processing.py | 2 +-
 modules/ui.py         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b8af1301..21dbef16 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1055,6 +1055,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
         samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
+        del x
 
         if not self.enable_hr:
             return samples
@@ -1137,7 +1138,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, p=self)
 
         # GC now before running the next img2img to prevent running out of memory
-        x = None
         devices.torch_gc()
 
         if not self.disable_extra_networks:
diff --git a/modules/ui.py b/modules/ui.py
index 6d8265f2..6fc9de83 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -476,9 +476,9 @@ def create_ui():
                                 hr_resize_y = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize height to", value=0, elem_id="txt2img_hr_resize_y")
 
                             with FormRow(elem_id="txt2img_hires_fix_row3", variant="compact", visible=opts.hires_fix_show_sampler) as hr_sampler_container:
-                                checkpoint_choices = lambda: ["Use same checkpoint"] + modules.sd_models.checkpoint_tiles(use_short=True)
-                                hr_checkpoint_name = gr.Dropdown(label='Hires checkpoint', elem_id="hr_checkpoint", choices=checkpoint_choices(), value="Use same checkpoint")
-                                create_refresh_button(hr_checkpoint_name, modules.sd_models.list_models, lambda: {"choices": checkpoint_choices()}, "hr_checkpoint_refresh")
+
+                                hr_checkpoint_name = gr.Dropdown(label='Hires checkpoint', elem_id="hr_checkpoint", choices=["Use same checkpoint"] + modules.sd_models.checkpoint_tiles(use_short=True), value="Use same checkpoint")
+                                create_refresh_button(hr_checkpoint_name, modules.sd_models.list_models, lambda: {"choices": ["Use same checkpoint"] + modules.sd_models.checkpoint_tiles(use_short=True)}, "hr_checkpoint_refresh")
 
                                 hr_sampler_index = gr.Dropdown(label='Hires sampling method', elem_id="hr_sampler", choices=["Use same sampler"] + [x.name for x in samplers_for_img2img], value="Use same sampler", type="index")
 
-- 
cgit v1.2.3


From eec540b22798ddcf8a03d947519c36635d77d722 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 15:04:12 +0300
Subject: repair non-latent upscaling broken for SDXL

---
 modules/processing.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 21dbef16..6fb14516 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1119,6 +1119,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             decoded_samples = torch.from_numpy(np.array(batch_images))
             decoded_samples = decoded_samples.to(shared.device)
             decoded_samples = 2. * decoded_samples - 1.
+            decoded_samples = decoded_samples.to(shared.device, dtype=devices.dtype_vae)
 
             samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples))
 
-- 
cgit v1.2.3


From a64fbe89288802f8b5ec8ca7bcab5aaf2c7bfea5 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 15:12:09 +0300
Subject: make it possible to use checkpoints of different types (SD1, SDXL) in
 first and second pass of hires fix

---
 modules/processing.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 6fb14516..c4da208f 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1060,16 +1060,21 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         if not self.enable_hr:
             return samples
 
+        if self.latent_scale_mode is None:
+            decoded_samples = decode_first_stage(self.sd_model, samples)
+        else:
+            decoded_samples = None
+
         current = shared.sd_model.sd_checkpoint_info
         try:
             if self.hr_checkpoint_info is not None:
                 sd_models.reload_model_weights(info=self.hr_checkpoint_info)
 
-            return self.sample_hr_pass(samples, seeds, subseeds, subseed_strength, prompts)
+            return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts)
         finally:
             sd_models.reload_model_weights(info=current)
 
-    def sample_hr_pass(self, samples, seeds, subseeds, subseed_strength, prompts):
+    def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts):
         self.is_hr_pass = True
 
         target_width = self.hr_upscale_to_x
@@ -1100,7 +1105,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             else:
                 image_conditioning = self.txt2img_image_conditioning(samples)
         else:
-            decoded_samples = decode_first_stage(self.sd_model, samples)
             lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
 
             batch_images = []
-- 
cgit v1.2.3


From cc53db6652b11e6f7bca42c3aa93bd6761ed3d3f Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 15:30:33 +0300
Subject: this time for sure

---
 modules/processing.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index c4da208f..3190b964 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -538,8 +538,12 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
     return x
 
 
+class DecodedSamples(list):
+    already_decoded = True
+
+
 def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
-    samples = []
+    samples = DecodedSamples()
 
     for i in range(batch.shape[0]):
         sample = decode_first_stage(model, batch[i:i + 1])[0]
@@ -793,7 +797,11 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
-            x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
+            if getattr(samples_ddim, 'already_decoded', False):
+                x_samples_ddim = samples_ddim
+            else:
+                x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
+
             x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
 
@@ -1161,9 +1169,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
 
+        decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
+
         self.is_hr_pass = False
 
-        return samples
+        return decoded_samples
 
     def close(self):
         super().close()
-- 
cgit v1.2.3


From 02038036ff571e0f04a94c3e279609666e239dec Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 16:16:31 +0300
Subject: make it so that VAE NaNs autodetection also works during first pass
 of hires fix

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 3190b964..0677de81 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1069,7 +1069,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             return samples
 
         if self.latent_scale_mode is None:
-            decoded_samples = decode_first_stage(self.sd_model, samples)
+            decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True))
         else:
             decoded_samples = None
 
-- 
cgit v1.2.3


From 0af4127fd14360ebb12c6569d98aebf8047abbfc Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 30 Jul 2023 19:36:24 +0300
Subject: delete the field that is preventing the model from being unloaded and
 is causing increased RAM usage

---
 modules/processing.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 0677de81..b09433b0 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1076,11 +1076,15 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         current = shared.sd_model.sd_checkpoint_info
         try:
             if self.hr_checkpoint_info is not None:
+                del self.sampler
                 sd_models.reload_model_weights(info=self.hr_checkpoint_info)
+                devices.torch_gc()
 
             return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts)
         finally:
+            del self.sampler
             sd_models.reload_model_weights(info=current)
+            devices.torch_gc()
 
     def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts):
         self.is_hr_pass = True
-- 
cgit v1.2.3


From dca121e9035ba36b3f7484c8a31a7776d85c0960 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 31 Jul 2023 09:13:07 +0300
Subject: set the field to None instead

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b09433b0..35e7b87e 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1076,13 +1076,13 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         current = shared.sd_model.sd_checkpoint_info
         try:
             if self.hr_checkpoint_info is not None:
-                del self.sampler
+                self.sampler = None
                 sd_models.reload_model_weights(info=self.hr_checkpoint_info)
                 devices.torch_gc()
 
             return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts)
         finally:
-            del self.sampler
+            self.sampler = None
             sd_models.reload_model_weights(info=current)
             devices.torch_gc()
 
-- 
cgit v1.2.3


From 29d7e31d89e9d686784eacbdbfc5b15959eb4449 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 31 Jul 2023 10:43:26 +0300
Subject: repair AttributeError: 'NoneType' object has no attribute
 'conditioning_key'

---
 modules/processing.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 35e7b87e..1f0c0b3b 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1104,6 +1104,13 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             info = create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index)
             images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, p=self, suffix="-before-highres-fix")
 
+        img2img_sampler_name = self.hr_sampler_name or self.sampler_name
+
+        if self.sampler_name in ['PLMS', 'UniPC']:  # PLMS/UniPC do not support img2img so we just silently switch to DDIM
+            img2img_sampler_name = 'DDIM'
+
+        self.sampler = sd_samplers.create_sampler(img2img_sampler_name, self.sd_model)
+
         if self.latent_scale_mode is not None:
             for i in range(samples.shape[0]):
                 save_intermediate(samples, i)
@@ -1143,13 +1150,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         shared.state.nextjob()
 
-        img2img_sampler_name = self.hr_sampler_name or self.sampler_name
-
-        if self.sampler_name in ['PLMS', 'UniPC']:  # PLMS/UniPC do not support img2img so we just silently switch to DDIM
-            img2img_sampler_name = 'DDIM'
-
-        self.sampler = sd_samplers.create_sampler(img2img_sampler_name, self.sd_model)
-
         samples = samples[:, :, self.truncate_y//2:samples.shape[2]-(self.truncate_y+1)//2, self.truncate_x//2:samples.shape[3]-(self.truncate_x+1)//2]
 
         noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, p=self)
-- 
cgit v1.2.3


From c09bc2c60856ca1ab2243386176badf909affdbe Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 31 Jul 2023 13:20:26 +0300
Subject: fix "clamp_scalar_cpu" not implemented for 'Half'

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 1f0c0b3b..f8f8bddc 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1069,7 +1069,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             return samples
 
         if self.latent_scale_mode is None:
-            decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True))
+            decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
         else:
             decoded_samples = None
 
-- 
cgit v1.2.3


From ccb92339348f6973de39cde062982a51a4cd0818 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Wed, 2 Aug 2023 18:53:09 +0300
Subject: add yet another torch_gc to reclaim some of VRAM after the initial
 stage of img2img

---
 modules/processing.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index b0992ee1..0b66cd2a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1348,6 +1348,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         image = image.to(shared.device, dtype=devices.dtype_vae)
 
         self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
+        devices.torch_gc()
 
         if self.resize_mode == 3:
             self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
-- 
cgit v1.2.3


From 84b6fcd02ca6d6ab48c4b6be4bb8724b1c2e7014 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Thu, 3 Aug 2023 00:00:23 +0300
Subject: add NV option for Random number generator source setting, which
 allows to generate same pictures on CPU/AMD/Mac as on NVidia  videocards.

---
 modules/devices.py                |  39 ++++++++++++++-
 modules/processing.py             |   6 +--
 modules/rng_philox.py             | 100 ++++++++++++++++++++++++++++++++++++++
 modules/sd_samplers_kdiffusion.py |   5 +-
 modules/shared.py                 |   2 +-
 5 files changed, 142 insertions(+), 10 deletions(-)
 create mode 100644 modules/rng_philox.py

(limited to 'modules/processing.py')

diff --git a/modules/devices.py b/modules/devices.py
index 57e51da3..b58776d8 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -3,7 +3,7 @@ import contextlib
 from functools import lru_cache
 
 import torch
-from modules import errors
+from modules import errors, rng_philox
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -90,23 +90,58 @@ def cond_cast_float(input):
     return input.float() if unet_needs_upcast else input
 
 
+nv_rng = None
+
+
 def randn(seed, shape):
     from modules.shared import opts
 
-    torch.manual_seed(seed)
+    manual_seed(seed)
+
+    if opts.randn_source == "NV":
+        return torch.asarray(nv_rng.randn(shape), device=device)
+
     if opts.randn_source == "CPU" or device.type == 'mps':
         return torch.randn(shape, device=cpu).to(device)
+
     return torch.randn(shape, device=device)
 
 
+def randn_like(x):
+    from modules.shared import opts
+
+    if opts.randn_source == "NV":
+        return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype)
+
+    if opts.randn_source == "CPU" or x.device.type == 'mps':
+        return torch.randn_like(x, device=cpu).to(x.device)
+
+    return torch.randn_like(x)
+
+
 def randn_without_seed(shape):
     from modules.shared import opts
 
+    if opts.randn_source == "NV":
+        return torch.asarray(nv_rng.randn(shape), device=device)
+
     if opts.randn_source == "CPU" or device.type == 'mps':
         return torch.randn(shape, device=cpu).to(device)
+
     return torch.randn(shape, device=device)
 
 
+def manual_seed(seed):
+    from modules.shared import opts
+
+    if opts.randn_source == "NV":
+        global nv_rng
+        nv_rng = rng_philox.Generator(seed)
+        return
+
+    torch.manual_seed(seed)
+
+
 def autocast(disable=False):
     from modules import shared
 
diff --git a/modules/processing.py b/modules/processing.py
index 0b66cd2a..8f34c8b4 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -492,7 +492,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
         noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
 
         subnoise = None
-        if subseeds is not None:
+        if subseeds is not None and subseed_strength != 0:
             subseed = 0 if i >= len(subseeds) else subseeds[i]
 
             subnoise = devices.randn(subseed, noise_shape)
@@ -524,7 +524,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
             cnt = p.sampler.number_of_needed_noises(p)
 
             if eta_noise_seed_delta > 0:
-                torch.manual_seed(seed + eta_noise_seed_delta)
+                devices.manual_seed(seed + eta_noise_seed_delta)
 
             for j in range(cnt):
                 sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape)))
@@ -636,7 +636,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio,
         "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr,
         "Init image hash": getattr(p, 'init_img_hash', None),
-        "RNG": opts.randn_source if opts.randn_source != "GPU" else None,
+        "RNG": opts.randn_source if opts.randn_source != "GPU" and opts.randn_source != "NV" else None,
         "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
         **p.extra_generation_params,
         "Version": program_version() if opts.add_version_to_infotext else None,
diff --git a/modules/rng_philox.py b/modules/rng_philox.py
new file mode 100644
index 00000000..b5c02483
--- /dev/null
+++ b/modules/rng_philox.py
@@ -0,0 +1,100 @@
+"""RNG imitiating torch cuda randn on CPU. You are welcome.
+
+Usage:
+
+```
+g = Generator(seed=0)
+print(g.randn(shape=(3, 4)))
+```
+
+Expected output:
+```
+[[-0.92466259 -0.42534415 -2.6438457   0.14518388]
+ [-0.12086647 -0.57972564 -0.62285122 -0.32838709]
+ [-1.07454231 -0.36314407 -1.67105067  2.26550497]]
+```
+"""
+
+import numpy as np
+
+philox_m = [0xD2511F53, 0xCD9E8D57]
+philox_w = [0x9E3779B9, 0xBB67AE85]
+
+two_pow32_inv = np.array([2.3283064e-10], dtype=np.float32)
+two_pow32_inv_2pi = np.array([2.3283064e-10 * 6.2831855], dtype=np.float32)
+
+
+def uint32(x):
+    """Converts (N,) np.uint64 array into (2, N) np.unit32 array."""
+    return np.moveaxis(x.view(np.uint32).reshape(-1, 2), 0, 1)
+
+
+def philox4_round(counter, key):
+    """A single round of the Philox 4x32 random number generator."""
+
+    v1 = uint32(counter[0].astype(np.uint64) * philox_m[0])
+    v2 = uint32(counter[2].astype(np.uint64) * philox_m[1])
+
+    counter[0] = v2[1] ^ counter[1] ^ key[0]
+    counter[1] = v2[0]
+    counter[2] = v1[1] ^ counter[3] ^ key[1]
+    counter[3] = v1[0]
+
+
+def philox4_32(counter, key, rounds=10):
+    """Generates 32-bit random numbers using the Philox 4x32 random number generator.
+
+    Parameters:
+        counter (numpy.ndarray): A 4xN array of 32-bit integers representing the counter values (offset into generation).
+        key (numpy.ndarray): A 2xN array of 32-bit integers representing the key values (seed).
+        rounds (int): The number of rounds to perform.
+
+    Returns:
+        numpy.ndarray: A 4xN array of 32-bit integers containing the generated random numbers.
+    """
+
+    for _ in range(rounds - 1):
+        philox4_round(counter, key)
+
+        key[0] = key[0] + philox_w[0]
+        key[1] = key[1] + philox_w[1]
+
+    philox4_round(counter, key)
+    return counter
+
+
+def box_muller(x, y):
+    """Returns just the first out of two numbers generated by Box–Muller transform algorithm."""
+    u = x.astype(np.float32) * two_pow32_inv + two_pow32_inv / 2
+    v = y.astype(np.float32) * two_pow32_inv_2pi + two_pow32_inv_2pi / 2
+
+    s = np.sqrt(-2.0 * np.log(u))
+
+    r1 = s * np.sin(v)
+    return r1.astype(np.float32)
+
+
+class Generator:
+    """RNG that produces same outputs as torch.randn(..., device='cuda') on CPU"""
+
+    def __init__(self, seed):
+        self.seed = seed
+        self.offset = 0
+
+    def randn(self, shape):
+        """Generate a sequence of n standard normal random variables using the Philox 4x32 random number generator and the Box-Muller transform."""
+
+        n = 1
+        for x in shape:
+            n *= x
+
+        counter = np.zeros((4, n), dtype=np.uint32)
+        counter[0] = self.offset
+        counter[2] = np.arange(n, dtype=np.uint32)  # up to 2^32 numbers can be generated - if you want more you'd need to spill into counter[3]
+        self.offset += 1
+
+        key = uint32(np.array([[self.seed] * n], dtype=np.uint64))
+
+        g = philox4_32(counter, key)
+
+        return box_muller(g[0], g[1]).reshape(shape)  # discard g[2] and g[3]
diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py
index e0da3425..d72c1b5f 100644
--- a/modules/sd_samplers_kdiffusion.py
+++ b/modules/sd_samplers_kdiffusion.py
@@ -260,10 +260,7 @@ class TorchHijack:
             if noise.shape == x.shape:
                 return noise
 
-        if opts.randn_source == "CPU" or x.device.type == 'mps':
-            return torch.randn_like(x, device=devices.cpu).to(x.device)
-        else:
-            return torch.randn_like(x)
+        return devices.randn_like(x)
 
 
 class KDiffusionSampler:
diff --git a/modules/shared.py b/modules/shared.py
index aa72c9c8..7103b4ca 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -428,7 +428,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
     "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}).link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer"),
     "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
     "auto_vae_precision": OptionInfo(True, "Automaticlly revert VAE to 32-bit floats").info("triggers when a tensor with NaNs is produced in VAE; disabling the option in this case will result in a black square image"),
-    "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors"),
+    "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"),
 }))
 
 options_templates.update(options_section(('sdxl', "Stable Diffusion XL"), {
-- 
cgit v1.2.3


From f0c1063a707a4a43823b0ed00e2a8eeb22a9ed0a Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Fri, 4 Aug 2023 09:09:09 +0300
Subject: resolve some of circular import issues for kohaku

---
 modules/hypernetworks/hypernetwork.py          |  5 ++---
 modules/processing.py                          |  7 +------
 modules/sd_hijack.py                           |  6 +++---
 modules/sd_samplers_common.py                  | 10 ++++++++--
 modules/textual_inversion/textual_inversion.py |  4 +++-
 5 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index c4821d21..70f1cbd2 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -10,7 +10,7 @@ import torch
 import tqdm
 from einops import rearrange, repeat
 from ldm.util import default
-from modules import devices, processing, sd_models, shared, sd_samplers, hashes, sd_hijack_checkpoint, errors
+from modules import devices, sd_models, shared, sd_samplers, hashes, sd_hijack_checkpoint, errors
 from modules.textual_inversion import textual_inversion, logging
 from modules.textual_inversion.learn_schedule import LearnRateScheduler
 from torch import einsum
@@ -469,8 +469,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None,
 
 
 def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, use_weight, create_image_every, save_hypernetwork_every, template_filename, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
-    # images allows training previews to have infotext. Importing it at the top causes a circular import problem.
-    from modules import images
+    from modules import images, processing
 
     save_hypernetwork_every = save_hypernetwork_every or 0
     create_image_every = create_image_every or 0
diff --git a/modules/processing.py b/modules/processing.py
index 8f34c8b4..8086a2b0 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -30,6 +30,7 @@ from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
 
+decode_first_stage = sd_samplers_common.decode_first_stage
 
 # some of those options should not be changed at all because they would break the model, so I removed them from options.
 opt_C = 4
@@ -572,12 +573,6 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
     return samples
 
 
-def decode_first_stage(model, x):
-    x = model.decode_first_stage(x.to(devices.dtype_vae))
-
-    return x
-
-
 def get_fixed_seed(seed):
     if seed is None or seed == '' or seed == -1:
         return int(random.randrange(4294967294))
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index cfa5f0eb..609fd56c 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -2,7 +2,6 @@ import torch
 from torch.nn.functional import silu
 from types import MethodType
 
-import modules.textual_inversion.textual_inversion
 from modules import devices, sd_hijack_optimizations, shared, script_callbacks, errors, sd_unet
 from modules.hypernetworks import hypernetwork
 from modules.shared import cmd_opts
@@ -164,12 +163,13 @@ class StableDiffusionModelHijack:
     clip = None
     optimization_method = None
 
-    embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase()
-
     def __init__(self):
+        import modules.textual_inversion.textual_inversion
+
         self.extra_generation_params = {}
         self.comments = []
 
+        self.embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase()
         self.embedding_db.add_embedding_dir(cmd_opts.embeddings_dir)
 
     def apply_optimizations(self, option=None):
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 5deda761..b3d344e7 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -2,7 +2,7 @@ from collections import namedtuple
 import numpy as np
 import torch
 from PIL import Image
-from modules import devices, processing, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared
+from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared
 from modules.shared import opts, state
 
 SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
@@ -35,7 +35,7 @@ def single_sample_to_image(sample, approximation=None):
         x_sample = sample * 1.5
         x_sample = sd_vae_taesd.model()(x_sample.to(devices.device, devices.dtype).unsqueeze(0))[0].detach()
     else:
-        x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5
+        x_sample = decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5
 
     x_sample = torch.clamp(x_sample, min=0.0, max=1.0)
     x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
@@ -44,6 +44,12 @@ def single_sample_to_image(sample, approximation=None):
     return Image.fromarray(x_sample)
 
 
+def decode_first_stage(model, x):
+    x = model.decode_first_stage(x.to(devices.dtype_vae))
+
+    return x
+
+
 def sample_to_image(samples, index=0, approximation=None):
     return single_sample_to_image(samples[index], approximation)
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 4713bc2d..aa79dc09 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -13,7 +13,7 @@ import numpy as np
 from PIL import Image, PngImagePlugin
 from torch.utils.tensorboard import SummaryWriter
 
-from modules import shared, devices, sd_hijack, processing, sd_models, images, sd_samplers, sd_hijack_checkpoint, errors, hashes
+from modules import shared, devices, sd_hijack, sd_models, images, sd_samplers, sd_hijack_checkpoint, errors, hashes
 import modules.textual_inversion.dataset
 from modules.textual_inversion.learn_schedule import LearnRateScheduler
 
@@ -387,6 +387,8 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat
 
 
 def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, use_weight, create_image_every, save_embedding_every, template_filename, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+    from modules import processing
+
     save_embedding_every = save_embedding_every or 0
     create_image_every = create_image_every or 0
     template_file = textual_inversion_templates.get(template_filename, None)
-- 
cgit v1.2.3