From d3c86e5178725b11a4679097f0aefb0a9fc90014 Mon Sep 17 00:00:00 2001
From: Jared Deckard <jared@shademaps.com>
Date: Wed, 14 Jun 2023 14:03:44 -0500
Subject: Note the Gradio user in the Exif data

---
 modules/processing.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index d22b353f..3e8d153e 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -180,6 +180,8 @@ class StableDiffusionProcessing:
         self.uc = None
         self.c = None
 
+        self.user = None
+
     @property
     def sd_model(self):
         return shared.sd_model
@@ -578,6 +580,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
         **p.extra_generation_params,
         "Version": program_version() if opts.add_version_to_infotext else None,
+        "User": p.user,
     }
 
     generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
-- 
cgit v1.2.3


From f603275d84301b5ee952683e951dd1aad72ba615 Mon Sep 17 00:00:00 2001
From: Jared Deckard <jared@shademaps.com>
Date: Thu, 15 Jun 2023 10:55:53 -0500
Subject: Add an opt-in infotext user name setting

---
 modules/processing.py | 2 +-
 modules/shared.py     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 3e8d153e..a0cc8db2 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -580,7 +580,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
         **p.extra_generation_params,
         "Version": program_version() if opts.add_version_to_infotext else None,
-        "User": p.user,
+        "User": p.user if opts.add_user_name_to_info else None,
     }
 
     generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
diff --git a/modules/shared.py b/modules/shared.py
index 271a062d..4c639a21 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -496,6 +496,7 @@ options_templates.update(options_section(('ui', "User interface"), {
 options_templates.update(options_section(('infotext', "Infotext"), {
     "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"),
     "add_model_name_to_info": OptionInfo(True, "Add model name to generation information"),
+    "add_user_name_to_info": OptionInfo(False, "Add user name to generation information when authenticated"),
     "add_version_to_infotext": OptionInfo(True, "Add program version to generation information"),
     "disable_weights_auto_swap": OptionInfo(True, "When reading generation parameters from text into UI (from PNG info or pasted text), do not change the selected model/checkpoint."),
 }))
-- 
cgit v1.2.3


From 27e9e3f6fa41c10eb5256662ddf3643dee933810 Mon Sep 17 00:00:00 2001
From: stablegeniusdiffuser <stablediffuser23@gmail.com>
Date: Mon, 19 Jun 2023 20:36:44 +0200
Subject: Add use_main_prompt parameter to use proper metadata for batch run
 grids or individual images

---
 modules/processing.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 8da73884..1d97e95e 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -549,7 +549,7 @@ def program_version():
     return res
 
 
-def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0):
+def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0, use_main_prompt=False):
     index = position_in_batch + iteration * p.batch_size
 
     clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers)
@@ -589,9 +589,10 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
 
     generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
 
+    prompt_text = p.prompt if use_main_prompt else all_prompts[index]
     negative_prompt_text = f"\nNegative prompt: {p.all_negative_prompts[index]}" if p.all_negative_prompts[index] else ""
 
-    return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
+    return f"{prompt_text}{negative_prompt_text}\n{generation_params_text}".strip()
 
 
 def process_images(p: StableDiffusionProcessing) -> Processed:
@@ -663,8 +664,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     else:
         p.all_subseeds = [int(subseed) + x for x in range(len(p.all_prompts))]
 
-    def infotext(iteration=0, position_in_batch=0):
-        return create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments, iteration, position_in_batch)
+    def infotext(iteration=0, position_in_batch=0, use_main_prompt=False):
+        return create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments, iteration, position_in_batch, use_main_prompt)
 
     if os.path.exists(cmd_opts.embeddings_dir) and not p.do_not_reload_embeddings:
         model_hijack.embedding_db.load_textual_inversion_embeddings()
@@ -824,7 +825,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             grid = images.image_grid(output_images, p.batch_size)
 
             if opts.return_grid:
-                text = infotext()
+                text = infotext(use_main_prompt=True)
                 infotexts.insert(0, text)
                 if opts.enable_pnginfo:
                     grid.info["parameters"] = text
@@ -832,7 +833,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 index_of_first_image = 1
 
             if opts.grid_save:
-                images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True)
+                images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(use_main_prompt=True), short_filename=not opts.grid_extended_filename, p=p, grid=True)
 
     if not p.disable_extra_networks and p.extra_network_data:
         extra_networks.deactivate(p, p.extra_network_data)
-- 
cgit v1.2.3


From b0ec69b360835a901a1aa57df1f7c8c9d55bf31c Mon Sep 17 00:00:00 2001
From: hako-mikan <122196982+hako-mikan@users.noreply.github.com>
Date: Wed, 28 Jun 2023 18:37:08 +0900
Subject: add 'before_hr callback' script callback

---
 modules/processing.py |  3 +++
 modules/scripts.py    | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 8da73884..35463c37 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1074,6 +1074,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio(for_hr=True))
 
+        if self.scripts is not None:
+            self.scripts.before_hr(self)
+
         samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
 
         sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
diff --git a/modules/scripts.py b/modules/scripts.py
index 99bf836a..6485f398 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -186,6 +186,11 @@ class Script:
 
         return f'script_{tabname}{title}_{item_id}'
 
+    def before_hr(self, p ,*args):
+        """
+        This function is called before hires fix start.
+        """
+        pass
 
 current_basedir = paths.script_path
 
@@ -548,6 +553,15 @@ class ScriptRunner:
                     self.scripts[si].args_to = args_to
 
 
+    def before_hr(self, p):
+        for script in self.alwayson_scripts:
+            try:
+                script_args = p.script_args[script.args_from:script.args_to]
+                script.before_hr(p, *script_args)
+            except Exception:
+                errors.report(f"Error running before_hr: {script.filename}", exc_info=True)
+
+
 scripts_txt2img: ScriptRunner = None
 scripts_img2img: ScriptRunner = None
 scripts_postproc: scripts_postprocessing.ScriptPostprocessingRunner = None
-- 
cgit v1.2.3


From 7f46f81dd7b517e829395734750f0eb8360675d4 Mon Sep 17 00:00:00 2001
From: missionfloyd <missionfloyd@users.noreply.github.com>
Date: Sat, 1 Jul 2023 17:20:56 -0600
Subject: Change default seed_resize to 0

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 8da73884..9e838aad 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -109,7 +109,7 @@ class StableDiffusionProcessing:
     cached_uc = [None, None]
     cached_c = [None, None]
 
-    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_min_uncond: float = 0.0, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
+    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = 0, seed_resize_from_w: int = 0, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_min_uncond: float = 0.0, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
         if sampler_index is not None:
             print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
 
-- 
cgit v1.2.3


From f731a728c68035ee36317ed0096ac5ecbfd50553 Mon Sep 17 00:00:00 2001
From: missionfloyd <missionfloyd@users.noreply.github.com>
Date: Mon, 3 Jul 2023 11:41:10 -0600
Subject: Check seed_resize_from <= 0

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 9e838aad..dc552121 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -109,7 +109,7 @@ class StableDiffusionProcessing:
     cached_uc = [None, None]
     cached_c = [None, None]
 
-    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = 0, seed_resize_from_w: int = 0, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_min_uncond: float = 0.0, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
+    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_min_uncond: float = 0.0, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
         if sampler_index is not None:
             print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
 
@@ -573,7 +573,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
         "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
         "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
-        "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
+        "Seed resize from": (None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
         "Denoising strength": getattr(p, 'denoising_strength', None),
         "Conditional mask weight": getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) if p.is_using_inpainting_conditioning else None,
         "Clip skip": None if clip_skip <= 1 else clip_skip,
-- 
cgit v1.2.3


From da468a585bb631bc91c3435f349dfb7ce7fe3895 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Mon, 3 Jul 2023 12:17:20 +0300
Subject: Fix typo: checkpoint_alisases

---
 modules/api/api.py    |  4 ++--
 modules/processing.py |  2 +-
 modules/sd_models.py  | 11 ++++++-----
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/api/api.py b/modules/api/api.py
index 224bbfc6..5793bb44 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -22,7 +22,7 @@ from modules.textual_inversion.textual_inversion import create_embedding, train_
 from modules.textual_inversion.preprocess import preprocess
 from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
 from PIL import PngImagePlugin,Image
-from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights, checkpoint_alisases
+from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights, checkpoint_aliases
 from modules.sd_vae import vae_dict
 from modules.sd_models_config import find_checkpoint_config_near_filename
 from modules.realesrgan_model import get_realesrgan_models
@@ -519,7 +519,7 @@ class Api:
 
     def set_config(self, req: Dict[str, Any]):
         checkpoint_name = req.get("sd_model_checkpoint", None)
-        if checkpoint_name is not None and checkpoint_name not in checkpoint_alisases:
+        if checkpoint_name is not None and checkpoint_name not in checkpoint_aliases:
             raise RuntimeError(f"model {checkpoint_name!r} not found")
 
         for k, v in req.items():
diff --git a/modules/processing.py b/modules/processing.py
index 21d1492c..cd568a20 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -606,7 +606,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
     try:
         # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint
-        if sd_models.checkpoint_alisases.get(p.override_settings.get('sd_model_checkpoint')) is None:
+        if sd_models.checkpoint_aliases.get(p.override_settings.get('sd_model_checkpoint')) is None:
             p.override_settings.pop('sd_model_checkpoint', None)
             sd_models.reload_model_weights()
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 653c4cc0..060e0007 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -23,7 +23,8 @@ model_dir = "Stable-diffusion"
 model_path = os.path.abspath(os.path.join(paths.models_path, model_dir))
 
 checkpoints_list = {}
-checkpoint_alisases = {}
+checkpoint_aliases = {}
+checkpoint_alisases = checkpoint_aliases  # for compatibility with old name
 checkpoints_loaded = collections.OrderedDict()
 
 
@@ -66,7 +67,7 @@ class CheckpointInfo:
     def register(self):
         checkpoints_list[self.title] = self
         for id in self.ids:
-            checkpoint_alisases[id] = self
+            checkpoint_aliases[id] = self
 
     def calculate_shorthash(self):
         self.sha256 = hashes.sha256(self.filename, f"checkpoint/{self.name}")
@@ -112,7 +113,7 @@ def checkpoint_tiles():
 
 def list_models():
     checkpoints_list.clear()
-    checkpoint_alisases.clear()
+    checkpoint_aliases.clear()
 
     cmd_ckpt = shared.cmd_opts.ckpt
     if shared.cmd_opts.no_download_sd_model or cmd_ckpt != shared.sd_model_file or os.path.exists(cmd_ckpt):
@@ -136,7 +137,7 @@ def list_models():
 
 
 def get_closet_checkpoint_match(search_string):
-    checkpoint_info = checkpoint_alisases.get(search_string, None)
+    checkpoint_info = checkpoint_aliases.get(search_string, None)
     if checkpoint_info is not None:
         return checkpoint_info
 
@@ -166,7 +167,7 @@ def select_checkpoint():
     """Raises `FileNotFoundError` if no checkpoints are found."""
     model_checkpoint = shared.opts.sd_model_checkpoint
 
-    checkpoint_info = checkpoint_alisases.get(model_checkpoint, None)
+    checkpoint_info = checkpoint_aliases.get(model_checkpoint, None)
     if checkpoint_info is not None:
         return checkpoint_info
 
-- 
cgit v1.2.3


From da464a3fb39ecc6ea7b22fe87271194480d8501c Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Wed, 12 Jul 2023 23:52:43 +0300
Subject: SDXL support

---
 modules/launch_utils.py            | 17 +++++++++++++
 modules/lowvram.py                 | 51 +++++++++++++++++++++++++++-----------
 modules/paths.py                   |  9 ++++++-
 modules/processing.py              |  7 ++++--
 modules/prompt_parser.py           | 23 ++++++++++++++---
 modules/sd_hijack.py               | 23 ++++++++++++++++-
 modules/sd_hijack_clip.py          | 16 +++++++++---
 modules/sd_hijack_open_clip.py     | 38 +++++++++++++++++++++++++---
 modules/sd_hijack_optimizations.py | 51 ++++++++++++++++++++++++++++++++------
 modules/sd_models.py               | 14 +++++++++--
 modules/sd_models_config.py        |  5 +++-
 modules/sd_models_xl.py            | 27 +++++++++++++++++---
 modules/sd_samplers_kdiffusion.py  |  2 +-
 modules/shared.py                  |  2 ++
 requirements.txt                   |  1 +
 requirements_versions.txt          |  1 +
 16 files changed, 242 insertions(+), 45 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/launch_utils.py b/modules/launch_utils.py
index 3b740dbd..aa9d1880 100644
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@@ -224,6 +224,20 @@ def run_extensions_installers(settings_file):
         run_extension_installer(os.path.join(extensions_dir, dirname_extension))
 
 
+def mute_sdxl_imports():
+    """create fake modules that SDXL wants to import but doesn't actually use for our purposes"""
+
+    import importlib
+
+    module = importlib.util.module_from_spec(importlib.machinery.ModuleSpec('taming.modules.losses.lpips', None))
+    module.LPIPS = None
+    sys.modules['taming.modules.losses.lpips'] = module
+
+    module = importlib.util.module_from_spec(importlib.machinery.ModuleSpec('sgm.data', None))
+    module.StableDataModuleFromConfig = None
+    sys.modules['sgm.data'] = module
+
+
 def prepare_environment():
     torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu118")
     torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.0.1 torchvision==0.15.2 --extra-index-url {torch_index_url}")
@@ -319,11 +333,14 @@ def prepare_environment():
     if args.update_all_extensions:
         git_pull_recursive(extensions_dir)
 
+    mute_sdxl_imports()
+
     if "--exit" in sys.argv:
         print("Exiting because of --exit argument")
         exit(0)
 
 
+
 def configure_for_tests():
     if "--api" not in sys.argv:
         sys.argv.append("--api")
diff --git a/modules/lowvram.py b/modules/lowvram.py
index d95bcfbf..da4f33a8 100644
--- a/modules/lowvram.py
+++ b/modules/lowvram.py
@@ -53,19 +53,46 @@ def setup_for_low_vram(sd_model, use_medvram):
         send_me_to_gpu(first_stage_model, None)
         return first_stage_model_decode(z)
 
-    # for SD1, cond_stage_model is CLIP and its NN is in the tranformer frield, but for SD2, it's open clip, and it's in model field
-    if hasattr(sd_model.cond_stage_model, 'model'):
-        sd_model.cond_stage_model.transformer = sd_model.cond_stage_model.model
-
-    # remove several big modules: cond, first_stage, depth/embedder (if applicable), and unet from the model and then
-    # send the model to GPU. Then put modules back. the modules will be in CPU.
-    stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, getattr(sd_model, 'depth_model', None), getattr(sd_model, 'embedder', None), sd_model.model
-    sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.depth_model, sd_model.embedder, sd_model.model = None, None, None, None, None
+    to_remain_in_cpu = [
+        (sd_model, 'first_stage_model'),
+        (sd_model, 'depth_model'),
+        (sd_model, 'embedder'),
+        (sd_model, 'model'),
+        (sd_model, 'embedder'),
+    ]
+
+    is_sdxl = hasattr(sd_model, 'conditioner')
+    is_sd2 = not is_sdxl and hasattr(sd_model.cond_stage_model, 'model')
+
+    if is_sdxl:
+        to_remain_in_cpu.append((sd_model, 'conditioner'))
+    elif is_sd2:
+        to_remain_in_cpu.append((sd_model.cond_stage_model, 'model'))
+    else:
+        to_remain_in_cpu.append((sd_model.cond_stage_model, 'transformer'))
+
+    # remove several big modules: cond, first_stage, depth/embedder (if applicable), and unet from the model
+    stored = []
+    for obj, field in to_remain_in_cpu:
+        module = getattr(obj, field, None)
+        stored.append(module)
+        setattr(obj, field, None)
+
+    # send the model to GPU.
     sd_model.to(devices.device)
-    sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.depth_model, sd_model.embedder, sd_model.model = stored
+
+    # put modules back. the modules will be in CPU.
+    for (obj, field), module in zip(to_remain_in_cpu, stored):
+        setattr(obj, field, module)
 
     # register hooks for those the first three models
-    sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
+    if is_sdxl:
+        sd_model.conditioner.register_forward_pre_hook(send_me_to_gpu)
+    elif is_sd2:
+        sd_model.cond_stage_model.model.register_forward_pre_hook(send_me_to_gpu)
+    else:
+        sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
+
     sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
     sd_model.first_stage_model.encode = first_stage_model_encode_wrap
     sd_model.first_stage_model.decode = first_stage_model_decode_wrap
@@ -75,10 +102,6 @@ def setup_for_low_vram(sd_model, use_medvram):
         sd_model.embedder.register_forward_pre_hook(send_me_to_gpu)
     parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
 
-    if hasattr(sd_model.cond_stage_model, 'model'):
-        sd_model.cond_stage_model.model = sd_model.cond_stage_model.transformer
-        del sd_model.cond_stage_model.transformer
-
     if use_medvram:
         sd_model.model.register_forward_pre_hook(send_me_to_gpu)
     else:
diff --git a/modules/paths.py b/modules/paths.py
index f509a85f..1100a8dc 100644
--- a/modules/paths.py
+++ b/modules/paths.py
@@ -20,7 +20,7 @@ assert sd_path is not None, f"Couldn't find Stable Diffusion in any of: {possibl
 
 path_dirs = [
     (sd_path, 'ldm', 'Stable Diffusion', []),
-    (os.path.join(sd_path, '../generative-models'), 'sgm', 'Stable Diffusion XL', []),
+    (os.path.join(sd_path, '../generative-models'), 'sgm', 'Stable Diffusion XL', ["sgm"]),
     (os.path.join(sd_path, '../CodeFormer'), 'inference_codeformer.py', 'CodeFormer', []),
     (os.path.join(sd_path, '../BLIP'), 'models/blip.py', 'BLIP', []),
     (os.path.join(sd_path, '../k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
@@ -36,6 +36,13 @@ for d, must_exist, what, options in path_dirs:
         d = os.path.abspath(d)
         if "atstart" in options:
             sys.path.insert(0, d)
+        elif "sgm" in options:
+            # Stable Diffusion XL repo has scripts dir with __init__.py in it which ruins every extension's scripts dir, so we
+            # import sgm and remove it from sys.path so that when a script imports scripts.something, it doesbn't use sgm's scripts dir.
+
+            sys.path.insert(0, d)
+            import sgm
+            sys.path.pop(0)
         else:
             sys.path.append(d)
         paths[what] = d
diff --git a/modules/processing.py b/modules/processing.py
index cd568a20..85d35423 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -343,10 +343,13 @@ class StableDiffusionProcessing:
         return cache[1]
 
     def setup_conds(self):
+        prompts = prompt_parser.SdConditioning(self.prompts, width=self.width, height=self.height)
+        negative_prompts = prompt_parser.SdConditioning(self.negative_prompts, width=self.width, height=self.height)
+
         sampler_config = sd_samplers.find_sampler_config(self.sampler_name)
         self.step_multiplier = 2 if sampler_config and sampler_config.options.get("second_order", False) else 1
-        self.uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, self.negative_prompts, self.steps * self.step_multiplier, [self.cached_uc], self.extra_network_data)
-        self.c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, self.prompts, self.steps * self.step_multiplier, [self.cached_c], self.extra_network_data)
+        self.uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, negative_prompts, self.steps * self.step_multiplier, [self.cached_uc], self.extra_network_data)
+        self.c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, prompts, self.steps * self.step_multiplier, [self.cached_c], self.extra_network_data)
 
     def parse_extra_network_prompts(self):
         self.prompts, self.extra_network_data = extra_networks.parse_prompts(self.prompts)
diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py
index d7f9e9a9..33810669 100644
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import re
 from collections import namedtuple
 from typing import List
@@ -109,7 +111,19 @@ def get_learned_conditioning_prompt_schedules(prompts, steps):
 ScheduledPromptConditioning = namedtuple("ScheduledPromptConditioning", ["end_at_step", "cond"])
 
 
-def get_learned_conditioning(model, prompts, steps):
+class SdConditioning(list):
+    """
+    A list with prompts for stable diffusion's conditioner model.
+    Can also specify width and height of created image - SDXL needs it.
+    """
+    def __init__(self, prompts, width=None, height=None):
+        super().__init__()
+        self.extend(prompts)
+        self.width = width or getattr(prompts, 'width', None)
+        self.height = height or getattr(prompts, 'height', None)
+
+
+def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
     """converts a list of prompts into a list of prompt schedules - each schedule is a list of ScheduledPromptConditioning, specifying the comdition (cond),
     and the sampling step at which this condition is to be replaced by the next one.
 
@@ -160,11 +174,13 @@ def get_learned_conditioning(model, prompts, steps):
 re_AND = re.compile(r"\bAND\b")
 re_weight = re.compile(r"^(.*?)(?:\s*:\s*([-+]?(?:\d+\.?|\d*\.\d+)))?\s*$")
 
-def get_multicond_prompt_list(prompts):
+
+def get_multicond_prompt_list(prompts: SdConditioning | list[str]):
     res_indexes = []
 
-    prompt_flat_list = []
     prompt_indexes = {}
+    prompt_flat_list = SdConditioning(prompts)
+    prompt_flat_list.clear()
 
     for prompt in prompts:
         subprompts = re_AND.split(prompt)
@@ -201,6 +217,7 @@ class MulticondLearnedConditioning:
         self.shape: tuple = shape  # the shape field is needed to send this object to DDIM/PLMS
         self.batch: List[List[ComposableScheduledPromptConditioning]] = batch
 
+
 def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearnedConditioning:
     """same as get_learned_conditioning, but returns a list of ScheduledPromptConditioning along with the weight objects for each prompt.
     For each prompt, the list is obtained by splitting the prompt using the AND separator.
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index c4b9211f..266811f9 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -15,6 +15,11 @@ import ldm.models.diffusion.ddim
 import ldm.models.diffusion.plms
 import ldm.modules.encoders.modules
 
+import sgm.modules.attention
+import sgm.modules.diffusionmodules.model
+import sgm.modules.diffusionmodules.openaimodel
+import sgm.modules.encoders.modules
+
 attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
 diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
 diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward
@@ -56,6 +61,9 @@ def apply_optimizations(option=None):
     ldm.modules.diffusionmodules.model.nonlinearity = silu
     ldm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
 
+    sgm.modules.diffusionmodules.model.nonlinearity = silu
+    sgm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
+
     if current_optimizer is not None:
         current_optimizer.undo()
         current_optimizer = None
@@ -89,6 +97,10 @@ def undo_optimizations():
     ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
     ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
 
+    sgm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity
+    sgm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
+    sgm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
+
 
 def fix_checkpoint():
     """checkpoints are now added and removed in embedding/hypernet code, since torch doesn't want
@@ -170,10 +182,19 @@ class StableDiffusionModelHijack:
         if conditioner:
             for i in range(len(conditioner.embedders)):
                 embedder = conditioner.embedders[i]
-                if type(embedder).__name__ == 'FrozenOpenCLIPEmbedder':
+                typename = type(embedder).__name__
+                if typename == 'FrozenOpenCLIPEmbedder':
                     embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self)
                     m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(embedder, self)
                     conditioner.embedders[i] = m.cond_stage_model
+                if typename == 'FrozenCLIPEmbedder':
+                    model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
+                    model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
+                    m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(embedder, self)
+                    conditioner.embedders[i] = m.cond_stage_model
+                if typename == 'FrozenOpenCLIPEmbedder2':
+                    embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self)
+                    conditioner.embedders[i] = sd_hijack_open_clip.FrozenOpenCLIPEmbedder2WithCustomWords(embedder, self)
 
         if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
             model_embeddings = m.cond_stage_model.roberta.embeddings
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 3b5a7666..6c17a81d 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -42,6 +42,10 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         self.hijack: sd_hijack.StableDiffusionModelHijack = hijack
         self.chunk_length = 75
 
+        self.is_trainable = getattr(wrapped, 'is_trainable', False)
+        self.input_key = getattr(wrapped, 'input_key', 'txt')
+        self.legacy_ucg_val = None
+
     def empty_chunk(self):
         """creates an empty PromptChunk and returns it"""
 
@@ -199,8 +203,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         """
         Accepts an array of texts; Passes texts through transformers network to create a tensor with numerical representation of those texts.
         Returns a tensor with shape of (B, T, C), where B is length of the array; T is length, in tokens, of texts (including padding) - T will
-        be a multiple of 77; and C is dimensionality of each token - for SD1 it's 768, and for SD2 it's 1024.
+        be a multiple of 77; and C is dimensionality of each token - for SD1 it's 768, for SD2 it's 1024, and for SDXL it's 1280.
         An example shape returned by this function can be: (2, 77, 768).
+        For SDXL, instead of returning one tensor avobe, it returns a tuple with two: the other one with shape (B, 1280) with pooled values.
         Webui usually sends just one text at a time through this function - the only time when texts is an array with more than one elemenet
         is when you do prompt editing: "a picture of a [cat:dog:0.4] eating ice cream"
         """
@@ -233,7 +238,10 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
             embeddings_list = ", ".join([f'{name} [{embedding.checksum()}]' for name, embedding in used_embeddings.items()])
             self.hijack.comments.append(f"Used embeddings: {embeddings_list}")
 
-        return torch.hstack(zs)
+        if getattr(self.wrapped, 'return_pooled', False):
+            return torch.hstack(zs), zs[0].pooled
+        else:
+            return torch.hstack(zs)
 
     def process_tokens(self, remade_batch_tokens, batch_multipliers):
         """
@@ -256,9 +264,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
         batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
         original_mean = z.mean()
-        z = z * batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
+        z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
         new_mean = z.mean()
-        z = z * (original_mean / new_mean)
+        z *= (original_mean / new_mean)
 
         return z
 
diff --git a/modules/sd_hijack_open_clip.py b/modules/sd_hijack_open_clip.py
index 6ac5bda6..fcf5ad07 100644
--- a/modules/sd_hijack_open_clip.py
+++ b/modules/sd_hijack_open_clip.py
@@ -16,10 +16,6 @@ class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWit
         self.id_end = tokenizer.encoder["<end_of_text>"]
         self.id_pad = 0
 
-        self.is_trainable = getattr(wrapped, 'is_trainable', False)
-        self.input_key = getattr(wrapped, 'input_key', 'txt')
-        self.legacy_ucg_val = None
-
     def tokenize(self, texts):
         assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
 
@@ -39,3 +35,37 @@ class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWit
         embedded = self.wrapped.model.token_embedding.wrapped(ids).squeeze(0)
 
         return embedded
+
+
+class FrozenOpenCLIPEmbedder2WithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):
+    def __init__(self, wrapped, hijack):
+        super().__init__(wrapped, hijack)
+
+        self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]
+        self.id_start = tokenizer.encoder["<start_of_text>"]
+        self.id_end = tokenizer.encoder["<end_of_text>"]
+        self.id_pad = 0
+
+    def tokenize(self, texts):
+        assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
+
+        tokenized = [tokenizer.encode(text) for text in texts]
+
+        return tokenized
+
+    def encode_with_transformers(self, tokens):
+        d = self.wrapped.encode_with_transformer(tokens)
+        z = d[self.wrapped.layer]
+
+        pooled = d.get("pooled")
+        if pooled is not None:
+            z.pooled = pooled
+
+        return z
+
+    def encode_embedding_init_text(self, init_text, nvpt):
+        ids = tokenizer.encode(init_text)
+        ids = torch.asarray([ids], device=devices.device, dtype=torch.int)
+        embedded = self.wrapped.model.token_embedding.wrapped(ids).squeeze(0)
+
+        return embedded
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 53e27ade..e99c9ba5 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -14,7 +14,11 @@ from modules.hypernetworks import hypernetwork
 import ldm.modules.attention
 import ldm.modules.diffusionmodules.model
 
+import sgm.modules.attention
+import sgm.modules.diffusionmodules.model
+
 diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward
+sgm_diffusionmodules_model_AttnBlock_forward = sgm.modules.diffusionmodules.model.AttnBlock.forward
 
 
 class SdOptimization:
@@ -39,6 +43,9 @@ class SdOptimization:
         ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
 
+        sgm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
+        sgm.modules.diffusionmodules.model.AttnBlock.forward = sgm_diffusionmodules_model_AttnBlock_forward
+
 
 class SdOptimizationXformers(SdOptimization):
     name = "xformers"
@@ -51,6 +58,8 @@ class SdOptimizationXformers(SdOptimization):
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = xformers_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = xformers_attnblock_forward
+        sgm.modules.attention.CrossAttention.forward = xformers_attention_forward
+        sgm.modules.diffusionmodules.model.AttnBlock.forward = xformers_attnblock_forward
 
 
 class SdOptimizationSdpNoMem(SdOptimization):
@@ -65,6 +74,8 @@ class SdOptimizationSdpNoMem(SdOptimization):
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = scaled_dot_product_no_mem_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sdp_no_mem_attnblock_forward
+        sgm.modules.attention.CrossAttention.forward = scaled_dot_product_no_mem_attention_forward
+        sgm.modules.diffusionmodules.model.AttnBlock.forward = sdp_no_mem_attnblock_forward
 
 
 class SdOptimizationSdp(SdOptimizationSdpNoMem):
@@ -76,6 +87,8 @@ class SdOptimizationSdp(SdOptimizationSdpNoMem):
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sdp_attnblock_forward
+        sgm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward
+        sgm.modules.diffusionmodules.model.AttnBlock.forward = sdp_attnblock_forward
 
 
 class SdOptimizationSubQuad(SdOptimization):
@@ -86,6 +99,8 @@ class SdOptimizationSubQuad(SdOptimization):
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = sub_quad_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sub_quad_attnblock_forward
+        sgm.modules.attention.CrossAttention.forward = sub_quad_attention_forward
+        sgm.modules.diffusionmodules.model.AttnBlock.forward = sub_quad_attnblock_forward
 
 
 class SdOptimizationV1(SdOptimization):
@@ -94,9 +109,9 @@ class SdOptimizationV1(SdOptimization):
     cmd_opt = "opt_split_attention_v1"
     priority = 10
 
-
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_v1
+        sgm.modules.attention.CrossAttention.forward = split_cross_attention_forward_v1
 
 
 class SdOptimizationInvokeAI(SdOptimization):
@@ -109,6 +124,7 @@ class SdOptimizationInvokeAI(SdOptimization):
 
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_invokeAI
+        sgm.modules.attention.CrossAttention.forward = split_cross_attention_forward_invokeAI
 
 
 class SdOptimizationDoggettx(SdOptimization):
@@ -119,6 +135,8 @@ class SdOptimizationDoggettx(SdOptimization):
     def apply(self):
         ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = cross_attention_attnblock_forward
+        sgm.modules.attention.CrossAttention.forward = split_cross_attention_forward
+        sgm.modules.diffusionmodules.model.AttnBlock.forward = cross_attention_attnblock_forward
 
 
 def list_optimizers(res):
@@ -155,7 +173,7 @@ def get_available_vram():
 
 
 # see https://github.com/basujindal/stable-diffusion/pull/117 for discussion
-def split_cross_attention_forward_v1(self, x, context=None, mask=None):
+def split_cross_attention_forward_v1(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     h = self.heads
 
     q_in = self.to_q(x)
@@ -196,7 +214,7 @@ def split_cross_attention_forward_v1(self, x, context=None, mask=None):
 
 
 # taken from https://github.com/Doggettx/stable-diffusion and modified
-def split_cross_attention_forward(self, x, context=None, mask=None):
+def split_cross_attention_forward(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     h = self.heads
 
     q_in = self.to_q(x)
@@ -262,11 +280,13 @@ def split_cross_attention_forward(self, x, context=None, mask=None):
 # -- Taken from https://github.com/invoke-ai/InvokeAI and modified --
 mem_total_gb = psutil.virtual_memory().total // (1 << 30)
 
+
 def einsum_op_compvis(q, k, v):
     s = einsum('b i d, b j d -> b i j', q, k)
     s = s.softmax(dim=-1, dtype=s.dtype)
     return einsum('b i j, b j d -> b i d', s, v)
 
+
 def einsum_op_slice_0(q, k, v, slice_size):
     r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
     for i in range(0, q.shape[0], slice_size):
@@ -274,6 +294,7 @@ def einsum_op_slice_0(q, k, v, slice_size):
         r[i:end] = einsum_op_compvis(q[i:end], k[i:end], v[i:end])
     return r
 
+
 def einsum_op_slice_1(q, k, v, slice_size):
     r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
     for i in range(0, q.shape[1], slice_size):
@@ -281,6 +302,7 @@ def einsum_op_slice_1(q, k, v, slice_size):
         r[:, i:end] = einsum_op_compvis(q[:, i:end], k, v)
     return r
 
+
 def einsum_op_mps_v1(q, k, v):
     if q.shape[0] * q.shape[1] <= 2**16: # (512x512) max q.shape[1]: 4096
         return einsum_op_compvis(q, k, v)
@@ -290,12 +312,14 @@ def einsum_op_mps_v1(q, k, v):
             slice_size -= 1
         return einsum_op_slice_1(q, k, v, slice_size)
 
+
 def einsum_op_mps_v2(q, k, v):
     if mem_total_gb > 8 and q.shape[0] * q.shape[1] <= 2**16:
         return einsum_op_compvis(q, k, v)
     else:
         return einsum_op_slice_0(q, k, v, 1)
 
+
 def einsum_op_tensor_mem(q, k, v, max_tensor_mb):
     size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20)
     if size_mb <= max_tensor_mb:
@@ -305,6 +329,7 @@ def einsum_op_tensor_mem(q, k, v, max_tensor_mb):
         return einsum_op_slice_0(q, k, v, q.shape[0] // div)
     return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
 
+
 def einsum_op_cuda(q, k, v):
     stats = torch.cuda.memory_stats(q.device)
     mem_active = stats['active_bytes.all.current']
@@ -315,6 +340,7 @@ def einsum_op_cuda(q, k, v):
     # Divide factor of safety as there's copying and fragmentation
     return einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
 
+
 def einsum_op(q, k, v):
     if q.device.type == 'cuda':
         return einsum_op_cuda(q, k, v)
@@ -328,7 +354,8 @@ def einsum_op(q, k, v):
     # Tested on i7 with 8MB L3 cache.
     return einsum_op_tensor_mem(q, k, v, 32)
 
-def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None):
+
+def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     h = self.heads
 
     q = self.to_q(x)
@@ -356,7 +383,7 @@ def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None):
 
 # Based on Birch-san's modified implementation of sub-quadratic attention from https://github.com/Birch-san/diffusers/pull/1
 # The sub_quad_attention_forward function is under the MIT License listed under Memory Efficient Attention in the Licenses section of the web UI interface
-def sub_quad_attention_forward(self, x, context=None, mask=None):
+def sub_quad_attention_forward(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     assert mask is None, "attention-mask not currently implemented for SubQuadraticCrossAttnProcessor."
 
     h = self.heads
@@ -392,6 +419,7 @@ def sub_quad_attention_forward(self, x, context=None, mask=None):
 
     return x
 
+
 def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_size_min=None, chunk_threshold=None, use_checkpoint=True):
     bytes_per_token = torch.finfo(q.dtype).bits//8
     batch_x_heads, q_tokens, _ = q.shape
@@ -442,7 +470,7 @@ def get_xformers_flash_attention_op(q, k, v):
     return None
 
 
-def xformers_attention_forward(self, x, context=None, mask=None):
+def xformers_attention_forward(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     h = self.heads
     q_in = self.to_q(x)
     context = default(context, x)
@@ -465,9 +493,10 @@ def xformers_attention_forward(self, x, context=None, mask=None):
     out = rearrange(out, 'b n h d -> b n (h d)', h=h)
     return self.to_out(out)
 
+
 # Based on Diffusers usage of scaled dot product attention from https://github.com/huggingface/diffusers/blob/c7da8fd23359a22d0df2741688b5b4f33c26df21/src/diffusers/models/cross_attention.py
 # The scaled_dot_product_attention_forward function contains parts of code under Apache-2.0 license listed under Scaled Dot Product Attention in the Licenses section of the web UI interface
-def scaled_dot_product_attention_forward(self, x, context=None, mask=None):
+def scaled_dot_product_attention_forward(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     batch_size, sequence_length, inner_dim = x.shape
 
     if mask is not None:
@@ -507,10 +536,12 @@ def scaled_dot_product_attention_forward(self, x, context=None, mask=None):
     hidden_states = self.to_out[1](hidden_states)
     return hidden_states
 
-def scaled_dot_product_no_mem_attention_forward(self, x, context=None, mask=None):
+
+def scaled_dot_product_no_mem_attention_forward(self, x, context=None, mask=None, additional_tokens=None, n_times_crossframe_attn_in_self=0):
     with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=False):
         return scaled_dot_product_attention_forward(self, x, context, mask)
 
+
 def cross_attention_attnblock_forward(self, x):
         h_ = x
         h_ = self.norm(h_)
@@ -569,6 +600,7 @@ def cross_attention_attnblock_forward(self, x):
 
         return h3
 
+
 def xformers_attnblock_forward(self, x):
     try:
         h_ = x
@@ -592,6 +624,7 @@ def xformers_attnblock_forward(self, x):
     except NotImplementedError:
         return cross_attention_attnblock_forward(self, x)
 
+
 def sdp_attnblock_forward(self, x):
     h_ = x
     h_ = self.norm(h_)
@@ -612,10 +645,12 @@ def sdp_attnblock_forward(self, x):
     out = self.proj_out(out)
     return x + out
 
+
 def sdp_no_mem_attnblock_forward(self, x):
     with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=False):
         return sdp_attnblock_forward(self, x)
 
+
 def sub_quad_attnblock_forward(self, x):
     h_ = x
     h_ = self.norm(h_)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 8d639583..e4aae597 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -411,6 +411,7 @@ def repair_config(sd_config):
 
 sd1_clip_weight = 'cond_stage_model.transformer.text_model.embeddings.token_embedding.weight'
 sd2_clip_weight = 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight'
+sdxl_clip_weight = 'conditioner.embedders.1.model.ln_final.weight'
 
 
 class SdModelData:
@@ -445,6 +446,15 @@ class SdModelData:
 model_data = SdModelData()
 
 
+def get_empty_cond(sd_model):
+    if hasattr(sd_model, 'conditioner'):
+        d = sd_model.get_learned_conditioning([""])
+        return d['crossattn']
+    else:
+        return sd_model.cond_stage_model([""])
+
+
+
 def load_model(checkpoint_info=None, already_loaded_state_dict=None):
     from modules import lowvram, sd_hijack
     checkpoint_info = checkpoint_info or select_checkpoint()
@@ -465,7 +475,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
         state_dict = get_checkpoint_state_dict(checkpoint_info, timer)
 
     checkpoint_config = sd_models_config.find_checkpoint_config(state_dict, checkpoint_info)
-    clip_is_included_into_sd = sd1_clip_weight in state_dict or sd2_clip_weight in state_dict
+    clip_is_included_into_sd = sd1_clip_weight in state_dict or sd2_clip_weight in state_dict or sdxl_clip_weight in state_dict
 
     timer.record("find config")
 
@@ -517,7 +527,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
     timer.record("scripts callbacks")
 
     with devices.autocast(), torch.no_grad():
-        sd_model.cond_stage_model_empty_prompt = sd_model.cond_stage_model([""])
+        sd_model.cond_stage_model_empty_prompt = get_empty_cond(sd_model)
 
     timer.record("calculate empty prompt")
 
diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py
index 96501569..2e92479a 100644
--- a/modules/sd_models_config.py
+++ b/modules/sd_models_config.py
@@ -14,6 +14,7 @@ config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
 config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml")
 config_sd2v = os.path.join(sd_xl_repo_configs_path, "sd_2_1_768.yaml")
 config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml")
+config_sdxl = os.path.join(sd_xl_repo_configs_path, "sd_xl_base.yaml")
 config_depth_model = os.path.join(sd_repo_configs_path, "v2-midas-inference.yaml")
 config_unclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-l-inference.yaml")
 config_unopenclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-h-inference.yaml")
@@ -70,7 +71,9 @@ def guess_model_config_from_state_dict(sd, filename):
     diffusion_model_input = sd.get('model.diffusion_model.input_blocks.0.0.weight', None)
     sd2_variations_weight = sd.get('embedder.model.ln_final.weight', None)
 
-    if sd.get('depth_model.model.pretrained.act_postprocess3.0.project.0.bias', None) is not None:
+    if sd.get('conditioner.embedders.1.model.ln_final.weight', None) is not None:
+        return config_sdxl
+    elif sd.get('depth_model.model.pretrained.act_postprocess3.0.project.0.bias', None) is not None:
         return config_depth_model
     elif sd2_variations_weight is not None and sd2_variations_weight.shape[0] == 768:
         return config_unclip
diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index d43b8868..e8e270c3 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -1,18 +1,30 @@
 from __future__ import annotations
 
+import sys
+
 import torch
 
 import sgm.models.diffusion
 import sgm.modules.diffusionmodules.denoiser_scaling
 import sgm.modules.diffusionmodules.discretizer
-from modules import devices
+from modules import devices, shared, prompt_parser
 
 
-def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: list[str]):
+def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: prompt_parser.SdConditioning | list[str]):
     for embedder in self.conditioner.embedders:
         embedder.ucg_rate = 0.0
 
-    c = self.conditioner({'txt': batch})
+    width = getattr(self, 'target_width', 1024)
+    height = getattr(self, 'target_height', 1024)
+
+    sdxl_conds = {
+        "txt": batch,
+        "original_size_as_tuple": torch.tensor([height, width]).repeat(len(batch), 1).to(devices.device, devices.dtype),
+        "crop_coords_top_left": torch.tensor([shared.opts.sdxl_crop_top, shared.opts.sdxl_crop_left]).repeat(len(batch), 1).to(devices.device, devices.dtype),
+        "target_size_as_tuple": torch.tensor([height, width]).repeat(len(batch), 1).to(devices.device, devices.dtype),
+    }
+
+    c = self.conditioner(sdxl_conds)
 
     return c
 
@@ -26,7 +38,7 @@ def extend_sdxl(model):
     model.model.diffusion_model.dtype = dtype
     model.model.conditioning_key = 'crossattn'
 
-    model.cond_stage_model = [x for x in model.conditioner.embedders if type(x).__name__ == 'FrozenOpenCLIPEmbedder'][0]
+    model.cond_stage_model = [x for x in model.conditioner.embedders if 'CLIPEmbedder' in type(x).__name__][0]
     model.cond_stage_key = model.cond_stage_model.input_key
 
     model.parameterization = "v" if isinstance(model.denoiser.scaling, sgm.modules.diffusionmodules.denoiser_scaling.VScaling) else "eps"
@@ -34,7 +46,14 @@ def extend_sdxl(model):
     discretization = sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization()
     model.alphas_cumprod = torch.asarray(discretization.alphas_cumprod, device=devices.device, dtype=dtype)
 
+    model.is_xl = True
+
 
 sgm.models.diffusion.DiffusionEngine.get_learned_conditioning = get_learned_conditioning
 sgm.models.diffusion.DiffusionEngine.apply_model = apply_model
 
+sgm.modules.attention.print = lambda *args: None
+sgm.modules.diffusionmodules.model.print = lambda *args: None
+sgm.modules.diffusionmodules.openaimodel.print = lambda *args: None
+sgm.modules.encoders.modules.print = lambda *args: None
+
diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py
index 73289ce4..5552a8dc 100644
--- a/modules/sd_samplers_kdiffusion.py
+++ b/modules/sd_samplers_kdiffusion.py
@@ -186,7 +186,7 @@ class CFGDenoiser(torch.nn.Module):
                 for batch_offset in range(0, x_out.shape[0], batch_size):
                     a = batch_offset
                     b = a + batch_size
-                    x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict(cond_in[a:b], image_cond_in[a:b]))
+                    x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict(subscript_cond(cond_in, a, b), image_cond_in[a:b]))
         else:
             x_out = torch.zeros_like(x_in)
             batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size
diff --git a/modules/shared.py b/modules/shared.py
index b7518de6..71afd94f 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -428,6 +428,8 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
     "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}).link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer"),
     "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
     "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors"),
+    "sdxl_crop_top": OptionInfo(0, "SDXL top coordinate of the crop"),
+    "sdxl_crop_left": OptionInfo(0, "SDXL left coordinate of the crop"),
 }))
 
 options_templates.update(options_section(('optimizations', "Optimizations"), {
diff --git a/requirements.txt b/requirements.txt
index 3142085e..b3f8a7f4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,7 @@ kornia
 lark
 numpy
 omegaconf
+open-clip-torch
 
 piexif
 psutil
diff --git a/requirements_versions.txt b/requirements_versions.txt
index f71b9d6c..b826bf43 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -15,6 +15,7 @@ kornia==0.6.7
 lark==1.1.2
 numpy==1.23.5
 omegaconf==2.2.3
+open-clip-torch==2.20.0
 piexif==1.1.3
 psutil~=5.9.5
 pytorch_lightning==1.9.4
-- 
cgit v1.2.3


From 594c8e7b263d9b37f4b18b56b159aeb6d1bba1b4 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Thu, 13 Jul 2023 11:35:52 +0300
Subject: fix CLIP doing the unneeded normalization revert SD2.1 back to use
 the original repo add SDXL's force_zero_embeddings to negative prompt

---
 modules/processing.py       |  2 +-
 modules/prompt_parser.py    | 14 ++++++++++----
 modules/sd_hijack.py        |  2 +-
 modules/sd_hijack_clip.py   | 15 +++++++++++++++
 modules/sd_models_config.py |  1 -
 modules/sd_models_xl.py     |  3 ++-
 6 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 85d35423..f01a6907 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -344,7 +344,7 @@ class StableDiffusionProcessing:
 
     def setup_conds(self):
         prompts = prompt_parser.SdConditioning(self.prompts, width=self.width, height=self.height)
-        negative_prompts = prompt_parser.SdConditioning(self.negative_prompts, width=self.width, height=self.height)
+        negative_prompts = prompt_parser.SdConditioning(self.negative_prompts, width=self.width, height=self.height, is_negative_prompt=True)
 
         sampler_config = sd_samplers.find_sampler_config(self.sampler_name)
         self.step_multiplier = 2 if sampler_config and sampler_config.options.get("second_order", False) else 1
diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py
index 33810669..b29d079d 100644
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@@ -116,11 +116,17 @@ class SdConditioning(list):
     A list with prompts for stable diffusion's conditioner model.
     Can also specify width and height of created image - SDXL needs it.
     """
-    def __init__(self, prompts, width=None, height=None):
+    def __init__(self, prompts, is_negative_prompt=False, width=None, height=None, copy_from=None):
         super().__init__()
         self.extend(prompts)
-        self.width = width or getattr(prompts, 'width', None)
-        self.height = height or getattr(prompts, 'height', None)
+
+        if copy_from is None:
+            copy_from = prompts
+
+        self.is_negative_prompt = is_negative_prompt or getattr(copy_from, 'is_negative_prompt', False)
+        self.width = width or getattr(copy_from, 'width', None)
+        self.height = height or getattr(copy_from, 'height', None)
+
 
 
 def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
@@ -153,7 +159,7 @@ def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
             res.append(cached)
             continue
 
-        texts = [x[1] for x in prompt_schedule]
+        texts = SdConditioning([x[1] for x in prompt_schedule], copy_from=prompts)
         conds = model.get_learned_conditioning(texts)
 
         cond_schedule = []
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 266811f9..647cdfbe 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -190,7 +190,7 @@ class StableDiffusionModelHijack:
                 if typename == 'FrozenCLIPEmbedder':
                     model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
                     model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
-                    m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(embedder, self)
+                    m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderForSDXLWithCustomWords(embedder, self)
                     conditioner.embedders[i] = m.cond_stage_model
                 if typename == 'FrozenOpenCLIPEmbedder2':
                     embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self)
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 6c17a81d..b3771909 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -323,3 +323,18 @@ class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
         embedded = embedding_layer.token_embedding.wrapped(ids.to(embedding_layer.token_embedding.wrapped.weight.device)).squeeze(0)
 
         return embedded
+
+
+class FrozenCLIPEmbedderForSDXLWithCustomWords(FrozenCLIPEmbedderWithCustomWords):
+    def __init__(self, wrapped, hijack):
+        super().__init__(wrapped, hijack)
+
+    def encode_with_transformers(self, tokens):
+        outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=self.wrapped.layer == "hidden")
+
+        if self.wrapped.layer == "last":
+            z = outputs.last_hidden_state
+        else:
+            z = outputs.hidden_states[self.wrapped.layer_idx]
+
+        return z
diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py
index 2e92479a..04c09ab0 100644
--- a/modules/sd_models_config.py
+++ b/modules/sd_models_config.py
@@ -12,7 +12,6 @@ sd_xl_repo_configs_path = os.path.join(paths.paths['Stable Diffusion XL'], "conf
 config_default = shared.sd_default_config
 config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
 config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml")
-config_sd2v = os.path.join(sd_xl_repo_configs_path, "sd_2_1_768.yaml")
 config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml")
 config_sdxl = os.path.join(sd_xl_repo_configs_path, "sd_xl_base.yaml")
 config_depth_model = os.path.join(sd_repo_configs_path, "v2-midas-inference.yaml")
diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index 1dd4459f..b799ff46 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -22,7 +22,8 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch:
         "target_size_as_tuple": torch.tensor([height, width]).repeat(len(batch), 1).to(devices.device, devices.dtype),
     }
 
-    c = self.conditioner(sdxl_conds)
+    force_zero_negative_prompt = getattr(batch, 'is_negative_prompt', False) and all(x == '' for x in batch)
+    c = self.conditioner(sdxl_conds, force_zero_embeddings=['txt'] if force_zero_negative_prompt else [])
 
     return c
 
-- 
cgit v1.2.3


From 471a5a66b73921d569242daccc5275cb195e3f06 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Fri, 14 Jul 2023 17:54:09 +0300
Subject: add more relevant fields to caching conds

---
 modules/processing.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index f01a6907..f68e010d 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -330,8 +330,21 @@ class StableDiffusionProcessing:
 
         caches is a list with items described above.
         """
+
+        cached_params = (
+            required_prompts,
+            steps,
+            opts.CLIP_stop_at_last_layers,
+            shared.sd_model.sd_checkpoint_info,
+            extra_network_data,
+            opts.sdxl_crop_left,
+            opts.sdxl_crop_top,
+            self.width,
+            self.height,
+        )
+
         for cache in caches:
-            if cache[0] is not None and (required_prompts, steps, opts.CLIP_stop_at_last_layers, shared.sd_model.sd_checkpoint_info, extra_network_data) == cache[0]:
+            if cache[0] is not None and cached_params == cache[0]:
                 return cache[1]
 
         cache = caches[0]
@@ -339,7 +352,7 @@ class StableDiffusionProcessing:
         with devices.autocast():
             cache[1] = function(shared.sd_model, required_prompts, steps)
 
-        cache[0] = (required_prompts, steps, opts.CLIP_stop_at_last_layers, shared.sd_model.sd_checkpoint_info, extra_network_data)
+        cache[0] = cached_params
         return cache[1]
 
     def setup_conds(self):
-- 
cgit v1.2.3


From 14cf434bc36d0ef31f31d4c6cd2bd15d7857d5c8 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 15 Jul 2023 07:33:16 +0300
Subject: fix an issue in live previews that happens when you use SDXL with
 fp16 VAE

---
 modules/processing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index f68e010d..eb4a60eb 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -539,8 +539,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
 
 
 def decode_first_stage(model, x):
-    with devices.autocast(disable=x.dtype == devices.dtype_vae):
-        x = model.decode_first_stage(x)
+    x = model.decode_first_stage(x.to(devices.dtype_vae))
 
     return x
 
-- 
cgit v1.2.3


From 2b1bae0d755c2d5201f6a6aadeadb5588208d43f Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 15 Jul 2023 08:41:22 +0300
Subject: add textual inversion hashes to infotext

---
 modules/processing.py                          |  7 ++++---
 modules/sd_hijack.py                           |  5 ++++-
 modules/sd_hijack_clip.py                      | 15 ++++++++++++---
 modules/shared.py                              |  1 +
 modules/textual_inversion/textual_inversion.py |  9 ++++++++-
 style.css                                      |  4 ++++
 6 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index cd568a20..49441e77 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -732,9 +732,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             p.setup_conds()
 
-            if len(model_hijack.comments) > 0:
-                for comment in model_hijack.comments:
-                    comments[comment] = 1
+            for comment in model_hijack.comments:
+                comments[comment] = 1
+
+            p.extra_generation_params.update(model_hijack.extra_generation_params)
 
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 3b6f95ce..6b5aae4b 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -147,7 +147,6 @@ def undo_weighted_forward(sd_model):
 
 class StableDiffusionModelHijack:
     fixes = None
-    comments = []
     layers = None
     circular_enabled = False
     clip = None
@@ -156,6 +155,9 @@ class StableDiffusionModelHijack:
     embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase()
 
     def __init__(self):
+        self.extra_generation_params = {}
+        self.comments = []
+
         self.embedding_db.add_embedding_dir(cmd_opts.embeddings_dir)
 
     def apply_optimizations(self, option=None):
@@ -236,6 +238,7 @@ class StableDiffusionModelHijack:
 
     def clear_comments(self):
         self.comments = []
+        self.extra_generation_params = {}
 
     def get_prompt_lengths(self, text):
         if self.clip is None:
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 3b5a7666..c1d780a3 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -229,9 +229,18 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
             z = self.process_tokens(tokens, multipliers)
             zs.append(z)
 
-        if len(used_embeddings) > 0:
-            embeddings_list = ", ".join([f'{name} [{embedding.checksum()}]' for name, embedding in used_embeddings.items()])
-            self.hijack.comments.append(f"Used embeddings: {embeddings_list}")
+        if opts.textual_inversion_add_hashes_to_infotext and used_embeddings:
+            hashes = []
+            for name, embedding in used_embeddings.items():
+                shorthash = embedding.shorthash
+                if not shorthash:
+                    continue
+
+                name = name.replace(":", "").replace(",", "")
+                hashes.append(f"{name}: {shorthash}")
+
+            if hashes:
+                self.hijack.extra_generation_params["TI hashes"] = ", ".join(hashes)
 
         return torch.hstack(zs)
 
diff --git a/modules/shared.py b/modules/shared.py
index 48478a68..a32fd4ed 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -472,6 +472,7 @@ options_templates.update(options_section(('extra_networks', "Extra Networks"), {
     "extra_networks_card_height": OptionInfo(0, "Card height for Extra Networks").info("in pixels"),
     "extra_networks_add_text_separator": OptionInfo(" ", "Extra networks separator").info("extra text to add before <...> when adding extra network to prompt"),
     "ui_extra_networks_tab_reorder": OptionInfo("", "Extra networks tab order").needs_restart(),
+    "textual_inversion_add_hashes_to_infotext": OptionInfo(True, "Add Textual Inversion hashes to infotext"),
     "sd_hypernetwork": OptionInfo("None", "Add hypernetwork to prompt", gr.Dropdown, lambda: {"choices": ["None", *hypernetworks]}, refresh=reload_hypernetworks),
 }))
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index cbe975b7..38e072a8 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -13,7 +13,7 @@ import numpy as np
 from PIL import Image, PngImagePlugin
 from torch.utils.tensorboard import SummaryWriter
 
-from modules import shared, devices, sd_hijack, processing, sd_models, images, sd_samplers, sd_hijack_checkpoint, errors
+from modules import shared, devices, sd_hijack, processing, sd_models, images, sd_samplers, sd_hijack_checkpoint, errors, hashes
 import modules.textual_inversion.dataset
 from modules.textual_inversion.learn_schedule import LearnRateScheduler
 
@@ -49,6 +49,8 @@ class Embedding:
         self.sd_checkpoint_name = None
         self.optimizer_state_dict = None
         self.filename = None
+        self.hash = None
+        self.shorthash = None
 
     def save(self, filename):
         embedding_data = {
@@ -82,6 +84,10 @@ class Embedding:
         self.cached_checksum = f'{const_hash(self.vec.reshape(-1) * 100) & 0xffff:04x}'
         return self.cached_checksum
 
+    def set_hash(self, v):
+        self.hash = v
+        self.shorthash = self.hash[0:12]
+
 
 class DirWithTextualInversionEmbeddings:
     def __init__(self, path):
@@ -199,6 +205,7 @@ class EmbeddingDatabase:
         embedding.vectors = vec.shape[0]
         embedding.shape = vec.shape[-1]
         embedding.filename = path
+        embedding.set_hash(hashes.sha256(embedding.filename, "textual_inversion/" + name) or '')
 
         if self.expected_shape == -1 or self.expected_shape == embedding.shape:
             self.register_embedding(embedding, shared.sd_model)
diff --git a/style.css b/style.css
index a424067f..9e13d7fd 100644
--- a/style.css
+++ b/style.css
@@ -231,6 +231,10 @@ button.custom-button{
     padding-top: 0.5em;
 }
 
+.html-log .comments:empty{
+    padding-top: 0;
+}
+
 .html-log .performance {
     font-size: 0.85em;
     color: #444;
-- 
cgit v1.2.3


From 570f42afd122405116b39b880cdb5163fd5ca3e2 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 16 Jul 2023 12:28:50 +0300
Subject: possible fix for FP16 VAE failing in img2img SDXL

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index e7b10808..6567b3cf 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1303,7 +1303,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
         image = torch.from_numpy(batch_images)
         image = 2. * image - 1.
-        image = image.to(shared.device)
+        image = image.to(shared.device, dtype=devices.dtype_vae)
 
         self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
 
-- 
cgit v1.2.3


From c8b55f29e2838e67bd9e394f5dbca4350ccbb68f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 19 Jul 2023 08:27:19 +0900
Subject: missing p save_image before-highres-fix

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 6567b3cf..b89ca5c2 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1029,7 +1029,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
                 image = sd_samplers.sample_to_image(image, index, approximation=0)
 
             info = create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index)
-            images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, suffix="-before-highres-fix")
+            images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, p=self, suffix="-before-highres-fix")
 
         if latent_scale_mode is not None:
             for i in range(samples.shape[0]):
-- 
cgit v1.2.3