From 41363e0d27bbaa0c84eebe3c7c8451075390ec4e Mon Sep 17 00:00:00 2001 From: dhwz Date: Fri, 16 Jun 2023 18:10:15 +0200 Subject: fix very slow loading speed of .safetensors files --- modules/sd_models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/sd_models.py') diff --git a/modules/sd_models.py b/modules/sd_models.py index 918f6fd6..d9ac675b 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -247,8 +247,11 @@ def read_metadata_from_safetensors(filename): def read_state_dict(checkpoint_file, print_global_state=False, map_location=None): _, extension = os.path.splitext(checkpoint_file) if extension.lower() == ".safetensors": - device = map_location or shared.weight_load_location or devices.get_optimal_device_name() - pl_sd = safetensors.torch.load_file(checkpoint_file, device=device) + if not shared.opts.disable_mmap_load_safetensors: + device = map_location or shared.weight_load_location or devices.get_optimal_device_name() + pl_sd = safetensors.torch.load_file(checkpoint_file, device=device) + else: + pl_sd = safetensors.torch.load(open(checkpoint_file, 'rb').read()) else: pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location) -- cgit v1.2.3 From 24129368f1b732be25ef486edb2cf5a6ace66737 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 27 Jun 2023 09:19:04 +0300 Subject: send tensors to the correct device when loading from safetensors file with memmap disabled for #11260 --- modules/sd_models.py | 4 +++- modules/shared.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'modules/sd_models.py') diff --git a/modules/sd_models.py b/modules/sd_models.py index 0391398a..f65f4e36 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -246,11 +246,13 @@ def read_metadata_from_safetensors(filename): def read_state_dict(checkpoint_file, print_global_state=False, map_location=None): _, extension = os.path.splitext(checkpoint_file) if extension.lower() == ".safetensors": + device = map_location or shared.weight_load_location or devices.get_optimal_device_name() + if not shared.opts.disable_mmap_load_safetensors: - device = map_location or shared.weight_load_location or devices.get_optimal_device_name() pl_sd = safetensors.torch.load_file(checkpoint_file, device=device) else: pl_sd = safetensors.torch.load(open(checkpoint_file, 'rb').read()) + pl_sd = {k: v.to(device) for k, v in pl_sd.items()} else: pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location) diff --git a/modules/shared.py b/modules/shared.py index 4743a428..203ee1b9 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -376,7 +376,7 @@ options_templates.update(options_section(('system', "System"), { "multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job."), "print_hypernet_extra": OptionInfo(False, "Print extra hypernetwork information to console."), "list_hidden_files": OptionInfo(True, "Load models/files in hidden directories").info("directory is hidden if its name starts with \".\""), - "disable_mmap_load_safetensors": OptionInfo(False, "Disable memmapping for loading .safetensors files (fixes very slow loading speed in some cases)."), + "disable_mmap_load_safetensors": OptionInfo(False, "Disable memmapping for loading .safetensors files.").info("fixes very slow loading speed in some cases"), })) options_templates.update(options_section(('training', "Training"), { -- cgit v1.2.3 From da8916f92649fc4d947cb46d9d8f8ea1621b2a59 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jul 2023 17:13:18 +0300 Subject: added torch.mps.empty_cache() to torch_gc() changed a bunch of places that use torch.cuda.empty_cache() to use torch_gc() instead --- extensions-builtin/LDSR/ldsr_model_arch.py | 8 +++----- extensions-builtin/ScuNET/scripts/scunet_model.py | 4 ++-- extensions-builtin/SwinIR/scripts/swinir_model.py | 5 +---- modules/codeformer_model.py | 2 +- modules/devices.py | 3 +++ modules/sd_models.py | 1 - 6 files changed, 10 insertions(+), 13 deletions(-) (limited to 'modules/sd_models.py') diff --git a/extensions-builtin/LDSR/ldsr_model_arch.py b/extensions-builtin/LDSR/ldsr_model_arch.py index 7f450086..7cac36ce 100644 --- a/extensions-builtin/LDSR/ldsr_model_arch.py +++ b/extensions-builtin/LDSR/ldsr_model_arch.py @@ -12,7 +12,7 @@ import safetensors.torch from ldm.models.diffusion.ddim import DDIMSampler from ldm.util import instantiate_from_config, ismap -from modules import shared, sd_hijack +from modules import shared, sd_hijack, devices cached_ldsr_model: torch.nn.Module = None @@ -112,8 +112,7 @@ class LDSR: gc.collect() - if torch.cuda.is_available: - torch.cuda.empty_cache() + devices.torch_gc() im_og = image width_og, height_og = im_og.size @@ -150,8 +149,7 @@ class LDSR: del model gc.collect() - if torch.cuda.is_available: - torch.cuda.empty_cache() + devices.torch_gc() return a diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py index ffef26b2..167d2f64 100644 --- a/extensions-builtin/ScuNET/scripts/scunet_model.py +++ b/extensions-builtin/ScuNET/scripts/scunet_model.py @@ -85,7 +85,7 @@ class UpscalerScuNET(modules.upscaler.Upscaler): def do_upscale(self, img: PIL.Image.Image, selected_file): - torch.cuda.empty_cache() + devices.torch_gc() try: model = self.load_model(selected_file) @@ -110,7 +110,7 @@ class UpscalerScuNET(modules.upscaler.Upscaler): torch_output = torch_output[:, :h * 1, :w * 1] # remove padding, if any np_output: np.ndarray = torch_output.float().cpu().clamp_(0, 1).numpy() del torch_img, torch_output - torch.cuda.empty_cache() + devices.torch_gc() output = np_output.transpose((1, 2, 0)) # CHW to HWC output = output[:, :, ::-1] # BGR to RGB diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py index c6bc53a8..c2c2a43c 100644 --- a/extensions-builtin/SwinIR/scripts/swinir_model.py +++ b/extensions-builtin/SwinIR/scripts/swinir_model.py @@ -42,10 +42,7 @@ class UpscalerSwinIR(Upscaler): return img model = model.to(device_swinir, dtype=devices.dtype) img = upscale(img, model) - try: - torch.cuda.empty_cache() - except Exception: - pass + devices.torch_gc() return img def load_model(self, path, scale=4): diff --git a/modules/codeformer_model.py b/modules/codeformer_model.py index f293acf5..da42b5e9 100644 --- a/modules/codeformer_model.py +++ b/modules/codeformer_model.py @@ -99,7 +99,7 @@ def setup_model(dirname): output = self.net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0] restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1)) del output - torch.cuda.empty_cache() + devices.torch_gc() except Exception: errors.report('Failed inference for CodeFormer', exc_info=True) restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1)) diff --git a/modules/devices.py b/modules/devices.py index 620ed1a6..c5ad950f 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -49,10 +49,13 @@ def get_device_for(task): def torch_gc(): + if torch.cuda.is_available(): with torch.cuda.device(get_cuda_device_string()): torch.cuda.empty_cache() torch.cuda.ipc_collect() + elif has_mps() and hasattr(torch.mps, 'empty_cache'): + torch.mps.empty_cache() def enable_tf32(): diff --git a/modules/sd_models.py b/modules/sd_models.py index f65f4e36..653c4cc0 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -590,7 +590,6 @@ def unload_model_weights(sd_model=None, info=None): sd_model = None gc.collect() devices.torch_gc() - torch.cuda.empty_cache() print(f"Unloaded weights {timer.summary()}.") -- cgit v1.2.3 From da468a585bb631bc91c3435f349dfb7ce7fe3895 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 3 Jul 2023 12:17:20 +0300 Subject: Fix typo: checkpoint_alisases --- modules/api/api.py | 4 ++-- modules/processing.py | 2 +- modules/sd_models.py | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'modules/sd_models.py') diff --git a/modules/api/api.py b/modules/api/api.py index 224bbfc6..5793bb44 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -22,7 +22,7 @@ from modules.textual_inversion.textual_inversion import create_embedding, train_ from modules.textual_inversion.preprocess import preprocess from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork from PIL import PngImagePlugin,Image -from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights, checkpoint_alisases +from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights, checkpoint_aliases from modules.sd_vae import vae_dict from modules.sd_models_config import find_checkpoint_config_near_filename from modules.realesrgan_model import get_realesrgan_models @@ -519,7 +519,7 @@ class Api: def set_config(self, req: Dict[str, Any]): checkpoint_name = req.get("sd_model_checkpoint", None) - if checkpoint_name is not None and checkpoint_name not in checkpoint_alisases: + if checkpoint_name is not None and checkpoint_name not in checkpoint_aliases: raise RuntimeError(f"model {checkpoint_name!r} not found") for k, v in req.items(): diff --git a/modules/processing.py b/modules/processing.py index 21d1492c..cd568a20 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -606,7 +606,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: try: # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint - if sd_models.checkpoint_alisases.get(p.override_settings.get('sd_model_checkpoint')) is None: + if sd_models.checkpoint_aliases.get(p.override_settings.get('sd_model_checkpoint')) is None: p.override_settings.pop('sd_model_checkpoint', None) sd_models.reload_model_weights() diff --git a/modules/sd_models.py b/modules/sd_models.py index 653c4cc0..060e0007 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -23,7 +23,8 @@ model_dir = "Stable-diffusion" model_path = os.path.abspath(os.path.join(paths.models_path, model_dir)) checkpoints_list = {} -checkpoint_alisases = {} +checkpoint_aliases = {} +checkpoint_alisases = checkpoint_aliases # for compatibility with old name checkpoints_loaded = collections.OrderedDict() @@ -66,7 +67,7 @@ class CheckpointInfo: def register(self): checkpoints_list[self.title] = self for id in self.ids: - checkpoint_alisases[id] = self + checkpoint_aliases[id] = self def calculate_shorthash(self): self.sha256 = hashes.sha256(self.filename, f"checkpoint/{self.name}") @@ -112,7 +113,7 @@ def checkpoint_tiles(): def list_models(): checkpoints_list.clear() - checkpoint_alisases.clear() + checkpoint_aliases.clear() cmd_ckpt = shared.cmd_opts.ckpt if shared.cmd_opts.no_download_sd_model or cmd_ckpt != shared.sd_model_file or os.path.exists(cmd_ckpt): @@ -136,7 +137,7 @@ def list_models(): def get_closet_checkpoint_match(search_string): - checkpoint_info = checkpoint_alisases.get(search_string, None) + checkpoint_info = checkpoint_aliases.get(search_string, None) if checkpoint_info is not None: return checkpoint_info @@ -166,7 +167,7 @@ def select_checkpoint(): """Raises `FileNotFoundError` if no checkpoints are found.""" model_checkpoint = shared.opts.sd_model_checkpoint - checkpoint_info = checkpoint_alisases.get(model_checkpoint, None) + checkpoint_info = checkpoint_aliases.get(model_checkpoint, None) if checkpoint_info is not None: return checkpoint_info -- cgit v1.2.3