From 41363e0d27bbaa0c84eebe3c7c8451075390ec4e Mon Sep 17 00:00:00 2001
From: dhwz <dhwz@gmx.net>
Date: Fri, 16 Jun 2023 18:10:15 +0200
Subject: fix very slow loading speed of .safetensors files

---
 modules/sd_models.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 918f6fd6..d9ac675b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -247,8 +247,11 @@ def read_metadata_from_safetensors(filename):
 def read_state_dict(checkpoint_file, print_global_state=False, map_location=None):
     _, extension = os.path.splitext(checkpoint_file)
     if extension.lower() == ".safetensors":
-        device = map_location or shared.weight_load_location or devices.get_optimal_device_name()
-        pl_sd = safetensors.torch.load_file(checkpoint_file, device=device)
+        if not shared.opts.disable_mmap_load_safetensors:
+            device = map_location or shared.weight_load_location or devices.get_optimal_device_name()
+            pl_sd = safetensors.torch.load_file(checkpoint_file, device=device)
+        else:
+            pl_sd = safetensors.torch.load(open(checkpoint_file, 'rb').read())
     else:
         pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location)
 
-- 
cgit v1.2.3


From 24129368f1b732be25ef486edb2cf5a6ace66737 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 27 Jun 2023 09:19:04 +0300
Subject: send tensors to the correct device when loading from safetensors file
 with memmap disabled for #11260

---
 modules/sd_models.py | 4 +++-
 modules/shared.py    | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 0391398a..f65f4e36 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -246,11 +246,13 @@ def read_metadata_from_safetensors(filename):
 def read_state_dict(checkpoint_file, print_global_state=False, map_location=None):
     _, extension = os.path.splitext(checkpoint_file)
     if extension.lower() == ".safetensors":
+        device = map_location or shared.weight_load_location or devices.get_optimal_device_name()
+
         if not shared.opts.disable_mmap_load_safetensors:
-            device = map_location or shared.weight_load_location or devices.get_optimal_device_name()
             pl_sd = safetensors.torch.load_file(checkpoint_file, device=device)
         else:
             pl_sd = safetensors.torch.load(open(checkpoint_file, 'rb').read())
+            pl_sd = {k: v.to(device) for k, v in pl_sd.items()}
     else:
         pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location)
 
diff --git a/modules/shared.py b/modules/shared.py
index 4743a428..203ee1b9 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -376,7 +376,7 @@ options_templates.update(options_section(('system', "System"), {
     "multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job."),
     "print_hypernet_extra": OptionInfo(False, "Print extra hypernetwork information to console."),
     "list_hidden_files": OptionInfo(True, "Load models/files in hidden directories").info("directory is hidden if its name starts with \".\""),
-    "disable_mmap_load_safetensors": OptionInfo(False, "Disable memmapping for loading .safetensors files (fixes very slow loading speed in some cases)."),
+    "disable_mmap_load_safetensors": OptionInfo(False, "Disable memmapping for loading .safetensors files.").info("fixes very slow loading speed in some cases"),
 }))
 
 options_templates.update(options_section(('training', "Training"), {
-- 
cgit v1.2.3


From da8916f92649fc4d947cb46d9d8f8ea1621b2a59 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jul 2023 17:13:18 +0300
Subject: added torch.mps.empty_cache() to torch_gc() changed a bunch of places
 that use torch.cuda.empty_cache() to use torch_gc() instead

---
 extensions-builtin/LDSR/ldsr_model_arch.py        | 8 +++-----
 extensions-builtin/ScuNET/scripts/scunet_model.py | 4 ++--
 extensions-builtin/SwinIR/scripts/swinir_model.py | 5 +----
 modules/codeformer_model.py                       | 2 +-
 modules/devices.py                                | 3 +++
 modules/sd_models.py                              | 1 -
 6 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/extensions-builtin/LDSR/ldsr_model_arch.py b/extensions-builtin/LDSR/ldsr_model_arch.py
index 7f450086..7cac36ce 100644
--- a/extensions-builtin/LDSR/ldsr_model_arch.py
+++ b/extensions-builtin/LDSR/ldsr_model_arch.py
@@ -12,7 +12,7 @@ import safetensors.torch
 
 from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.util import instantiate_from_config, ismap
-from modules import shared, sd_hijack
+from modules import shared, sd_hijack, devices
 
 cached_ldsr_model: torch.nn.Module = None
 
@@ -112,8 +112,7 @@ class LDSR:
 
 
         gc.collect()
-        if torch.cuda.is_available:
-            torch.cuda.empty_cache()
+        devices.torch_gc()
 
         im_og = image
         width_og, height_og = im_og.size
@@ -150,8 +149,7 @@ class LDSR:
 
         del model
         gc.collect()
-        if torch.cuda.is_available:
-            torch.cuda.empty_cache()
+        devices.torch_gc()
 
         return a
 
diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py
index ffef26b2..167d2f64 100644
--- a/extensions-builtin/ScuNET/scripts/scunet_model.py
+++ b/extensions-builtin/ScuNET/scripts/scunet_model.py
@@ -85,7 +85,7 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
 
     def do_upscale(self, img: PIL.Image.Image, selected_file):
 
-        torch.cuda.empty_cache()
+        devices.torch_gc()
 
         try:
             model = self.load_model(selected_file)
@@ -110,7 +110,7 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
         torch_output = torch_output[:, :h * 1, :w * 1] # remove padding, if any
         np_output: np.ndarray = torch_output.float().cpu().clamp_(0, 1).numpy()
         del torch_img, torch_output
-        torch.cuda.empty_cache()
+        devices.torch_gc()
 
         output = np_output.transpose((1, 2, 0))  # CHW to HWC
         output = output[:, :, ::-1]  # BGR to RGB
diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index c6bc53a8..c2c2a43c 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -42,10 +42,7 @@ class UpscalerSwinIR(Upscaler):
             return img
         model = model.to(device_swinir, dtype=devices.dtype)
         img = upscale(img, model)
-        try:
-            torch.cuda.empty_cache()
-        except Exception:
-            pass
+        devices.torch_gc()
         return img
 
     def load_model(self, path, scale=4):
diff --git a/modules/codeformer_model.py b/modules/codeformer_model.py
index f293acf5..da42b5e9 100644
--- a/modules/codeformer_model.py
+++ b/modules/codeformer_model.py
@@ -99,7 +99,7 @@ def setup_model(dirname):
                             output = self.net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0]
                             restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
                         del output
-                        torch.cuda.empty_cache()
+                        devices.torch_gc()
                     except Exception:
                         errors.report('Failed inference for CodeFormer', exc_info=True)
                         restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
diff --git a/modules/devices.py b/modules/devices.py
index 620ed1a6..c5ad950f 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -49,10 +49,13 @@ def get_device_for(task):
 
 
 def torch_gc():
+
     if torch.cuda.is_available():
         with torch.cuda.device(get_cuda_device_string()):
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()
+    elif has_mps() and hasattr(torch.mps, 'empty_cache'):
+        torch.mps.empty_cache()
 
 
 def enable_tf32():
diff --git a/modules/sd_models.py b/modules/sd_models.py
index f65f4e36..653c4cc0 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -590,7 +590,6 @@ def unload_model_weights(sd_model=None, info=None):
         sd_model = None
         gc.collect()
         devices.torch_gc()
-        torch.cuda.empty_cache()
 
     print(f"Unloaded weights {timer.summary()}.")
 
-- 
cgit v1.2.3


From da468a585bb631bc91c3435f349dfb7ce7fe3895 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Mon, 3 Jul 2023 12:17:20 +0300
Subject: Fix typo: checkpoint_alisases

---
 modules/api/api.py    |  4 ++--
 modules/processing.py |  2 +-
 modules/sd_models.py  | 11 ++++++-----
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/api/api.py b/modules/api/api.py
index 224bbfc6..5793bb44 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -22,7 +22,7 @@ from modules.textual_inversion.textual_inversion import create_embedding, train_
 from modules.textual_inversion.preprocess import preprocess
 from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
 from PIL import PngImagePlugin,Image
-from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights, checkpoint_alisases
+from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights, checkpoint_aliases
 from modules.sd_vae import vae_dict
 from modules.sd_models_config import find_checkpoint_config_near_filename
 from modules.realesrgan_model import get_realesrgan_models
@@ -519,7 +519,7 @@ class Api:
 
     def set_config(self, req: Dict[str, Any]):
         checkpoint_name = req.get("sd_model_checkpoint", None)
-        if checkpoint_name is not None and checkpoint_name not in checkpoint_alisases:
+        if checkpoint_name is not None and checkpoint_name not in checkpoint_aliases:
             raise RuntimeError(f"model {checkpoint_name!r} not found")
 
         for k, v in req.items():
diff --git a/modules/processing.py b/modules/processing.py
index 21d1492c..cd568a20 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -606,7 +606,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
     try:
         # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint
-        if sd_models.checkpoint_alisases.get(p.override_settings.get('sd_model_checkpoint')) is None:
+        if sd_models.checkpoint_aliases.get(p.override_settings.get('sd_model_checkpoint')) is None:
             p.override_settings.pop('sd_model_checkpoint', None)
             sd_models.reload_model_weights()
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 653c4cc0..060e0007 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -23,7 +23,8 @@ model_dir = "Stable-diffusion"
 model_path = os.path.abspath(os.path.join(paths.models_path, model_dir))
 
 checkpoints_list = {}
-checkpoint_alisases = {}
+checkpoint_aliases = {}
+checkpoint_alisases = checkpoint_aliases  # for compatibility with old name
 checkpoints_loaded = collections.OrderedDict()
 
 
@@ -66,7 +67,7 @@ class CheckpointInfo:
     def register(self):
         checkpoints_list[self.title] = self
         for id in self.ids:
-            checkpoint_alisases[id] = self
+            checkpoint_aliases[id] = self
 
     def calculate_shorthash(self):
         self.sha256 = hashes.sha256(self.filename, f"checkpoint/{self.name}")
@@ -112,7 +113,7 @@ def checkpoint_tiles():
 
 def list_models():
     checkpoints_list.clear()
-    checkpoint_alisases.clear()
+    checkpoint_aliases.clear()
 
     cmd_ckpt = shared.cmd_opts.ckpt
     if shared.cmd_opts.no_download_sd_model or cmd_ckpt != shared.sd_model_file or os.path.exists(cmd_ckpt):
@@ -136,7 +137,7 @@ def list_models():
 
 
 def get_closet_checkpoint_match(search_string):
-    checkpoint_info = checkpoint_alisases.get(search_string, None)
+    checkpoint_info = checkpoint_aliases.get(search_string, None)
     if checkpoint_info is not None:
         return checkpoint_info
 
@@ -166,7 +167,7 @@ def select_checkpoint():
     """Raises `FileNotFoundError` if no checkpoints are found."""
     model_checkpoint = shared.opts.sd_model_checkpoint
 
-    checkpoint_info = checkpoint_alisases.get(model_checkpoint, None)
+    checkpoint_info = checkpoint_aliases.get(model_checkpoint, None)
     if checkpoint_info is not None:
         return checkpoint_info
 
-- 
cgit v1.2.3