From 7c128bbdac0da1767c239174e91af6f327845372 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 19 Oct 2023 13:56:17 +0800
Subject: Add fp8 for sd unet

---
 modules/sd_models.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 3b6cdea1..3b8ff820 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -391,6 +391,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
         devices.dtype_unet = torch.float16
         timer.record("apply half()")
+        if shared.cmd_opts.opt_unet_fp8_storage:
+            model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
+            timer.record("apply fp8 unet")
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
-- 
cgit v1.2.3


From 5f9ddfa46f28ca2aa9e0bd832f6bbd67069be63e Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 19 Oct 2023 23:57:22 +0800
Subject: Add sdxl only arg

---
 modules/sd_models.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 3b8ff820..08af128f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -394,6 +394,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         if shared.cmd_opts.opt_unet_fp8_storage:
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
             timer.record("apply fp8 unet")
+        elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
+            model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
+            timer.record("apply fp8 unet for sdxl")
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
-- 
cgit v1.2.3


From eaa9f5162fbca2ebcb2682eb861bc7e5510a2b66 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Tue, 24 Oct 2023 01:49:05 +0800
Subject: Add CPU fp8 support

Since norm layer need fp32, I only convert the linear operation layer(conv2d/linear)

And TE have some pytorch function not support bf16 amp in CPU. I add a condition to indicate if the autocast is for unet.
---
 modules/sd_models.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 08af128f..c5fe57bf 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -391,12 +391,24 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
         devices.dtype_unet = torch.float16
         timer.record("apply half()")
-        if shared.cmd_opts.opt_unet_fp8_storage:
+
+    if shared.cmd_opts.opt_unet_fp8_storage:
+        enable_fp8 = True
+    elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
+        enable_fp8 = True
+    
+    if enable_fp8:
+        devices.fp8 = True
+        if devices.device == devices.cpu:
+            for module in model.model.diffusion_model.modules():
+                if isinstance(module, torch.nn.Conv2d):
+                    module.to(torch.float8_e4m3fn)
+                elif isinstance(module, torch.nn.Linear):
+                    module.to(torch.float8_e4m3fn)
+            timer.record("apply fp8 unet for cpu")
+        else:
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
             timer.record("apply fp8 unet")
-        elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
-            model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
-            timer.record("apply fp8 unet for sdxl")
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
-- 
cgit v1.2.3


From 9c1eba2af3a6f9cd6282b3a367656793cbe70c01 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Tue, 24 Oct 2023 02:11:27 +0800
Subject: Fix lint

---
 modules/sd_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index c5fe57bf..44d4038b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -396,7 +396,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         enable_fp8 = True
     elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
         enable_fp8 = True
-    
+
     if enable_fp8:
         devices.fp8 = True
         if devices.device == devices.cpu:
-- 
cgit v1.2.3


From 1df6c8bfec4715610d64684b6ad2fa38c76c1df6 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Wed, 25 Oct 2023 11:36:43 +0800
Subject: fp8 for TE

---
 modules/sd_models.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 44d4038b..69395294 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -407,6 +407,13 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
                     module.to(torch.float8_e4m3fn)
             timer.record("apply fp8 unet for cpu")
         else:
+            if model.is_sdxl:
+                cond_stage = model.conditioner
+            else:
+                cond_stage = model.cond_stage_model
+            for module in cond_stage.modules():
+                if isinstance(module, torch.nn.Linear):
+                    module.to(torch.float8_e4m3fn)
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
             timer.record("apply fp8 unet")
 
-- 
cgit v1.2.3


From 4830b251366436ee8499c003fe87e46ddb4a4581 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Wed, 25 Oct 2023 11:53:37 +0800
Subject: Fix alphas_cumprod dtype

---
 modules/sd_models.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 69395294..23660454 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -416,6 +416,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
                     module.to(torch.float8_e4m3fn)
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
             timer.record("apply fp8 unet")
+        model.alphas_cumprod = model.alphas_cumprod.to(torch.float32)
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
-- 
cgit v1.2.3


From bf5067f50ca32cd4764638702e3cc38bca8bfd8b Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Wed, 25 Oct 2023 12:54:28 +0800
Subject: Fix alphas cumprod

---
 modules/sd_models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 23660454..7ed89a9c 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -396,6 +396,8 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         enable_fp8 = True
     elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
         enable_fp8 = True
+    else:
+        enable_fp8 = False
 
     if enable_fp8:
         devices.fp8 = True
@@ -416,7 +418,6 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
                     module.to(torch.float8_e4m3fn)
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
             timer.record("apply fp8 unet")
-        model.alphas_cumprod = model.alphas_cumprod.to(torch.float32)
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
-- 
cgit v1.2.3


From dda067f64d3289cee3ffd65767126cb30ae73b13 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Wed, 25 Oct 2023 19:53:22 +0800
Subject: ignore mps for fp8

---
 modules/sd_models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 7ed89a9c..ccb6afd2 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -392,7 +392,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         devices.dtype_unet = torch.float16
         timer.record("apply half()")
 
-    if shared.cmd_opts.opt_unet_fp8_storage:
+    if devices.get_optimal_device_name() == "mps":
+        enable_fp8 = False
+    elif shared.cmd_opts.opt_unet_fp8_storage:
         enable_fp8 = True
     elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
         enable_fp8 = True
-- 
cgit v1.2.3


From d4d3134f6d2d232c7bcfa80900a362921e644976 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 28 Oct 2023 15:24:26 +0800
Subject: ManualCast for 10/16 series gpu

---
 modules/sd_models.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index ccb6afd2..31bcb913 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -403,23 +403,26 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
     if enable_fp8:
         devices.fp8 = True
+        if model.is_sdxl:
+            cond_stage = model.conditioner
+        else:
+            cond_stage = model.cond_stage_model
+
+        for module in cond_stage.modules():
+            if isinstance(module, torch.nn.Linear):
+                module.to(torch.float8_e4m3fn)
+
         if devices.device == devices.cpu:
             for module in model.model.diffusion_model.modules():
                 if isinstance(module, torch.nn.Conv2d):
                     module.to(torch.float8_e4m3fn)
                 elif isinstance(module, torch.nn.Linear):
                     module.to(torch.float8_e4m3fn)
-            timer.record("apply fp8 unet for cpu")
         else:
-            if model.is_sdxl:
-                cond_stage = model.conditioner
-            else:
-                cond_stage = model.cond_stage_model
-            for module in cond_stage.modules():
-                if isinstance(module, torch.nn.Linear):
-                    module.to(torch.float8_e4m3fn)
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
-            timer.record("apply fp8 unet")
+        timer.record("apply fp8")
+    else:
+        devices.fp8 = False
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
-- 
cgit v1.2.3


From 598da5cd4928618b166886d3485ce30ce3a43490 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sun, 19 Nov 2023 15:50:06 +0800
Subject: Use options instead of cmd_args

---
 modules/sd_models.py | 61 +++++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index a6c8b2fa..eb491434 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -339,10 +339,28 @@ class SkipWritingToConfig:
         SkipWritingToConfig.skip = self.previous
 
 
+def check_fp8(model):
+    if model is None:
+        return None
+    if devices.get_optimal_device_name() == "mps":
+        enable_fp8 = False
+    elif shared.opts.fp8_storage == "Enable":
+        enable_fp8 = True
+    elif getattr(model, "is_sdxl", False) and shared.opts.fp8_storage == "Enable for SDXL":
+        enable_fp8 = True
+    else:
+        enable_fp8 = False
+    return enable_fp8
+
+
 def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer):
     sd_model_hash = checkpoint_info.calculate_shorthash()
     timer.record("calculate hash")
 
+    if not check_fp8(model) and devices.fp8:
+        # prevent model to load state dict in fp8
+        model.half()
+
     if not SkipWritingToConfig.skip:
         shared.opts.data["sd_model_checkpoint"] = checkpoint_info.title
 
@@ -395,34 +413,16 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         devices.dtype_unet = torch.float16
         timer.record("apply half()")
 
-    if devices.get_optimal_device_name() == "mps":
-        enable_fp8 = False
-    elif shared.cmd_opts.opt_unet_fp8_storage:
-        enable_fp8 = True
-    elif model.is_sdxl and shared.cmd_opts.opt_unet_fp8_storage_xl:
-        enable_fp8 = True
-    else:
-        enable_fp8 = False
-
-    if enable_fp8:
+    if check_fp8(model):
         devices.fp8 = True
-        if model.is_sdxl:
-            cond_stage = model.conditioner
-        else:
-            cond_stage = model.cond_stage_model
-
-        for module in cond_stage.modules():
-            if isinstance(module, torch.nn.Linear):
+        first_stage = model.first_stage_model
+        model.first_stage_model = None
+        for module in model.modules():
+            if isinstance(module, torch.nn.Conv2d):
                 module.to(torch.float8_e4m3fn)
-
-        if devices.device == devices.cpu:
-            for module in model.model.diffusion_model.modules():
-                if isinstance(module, torch.nn.Conv2d):
-                    module.to(torch.float8_e4m3fn)
-                elif isinstance(module, torch.nn.Linear):
-                    module.to(torch.float8_e4m3fn)
-        else:
-            model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
+            elif isinstance(module, torch.nn.Linear):
+                module.to(torch.float8_e4m3fn)
+        model.first_stage_model = first_stage
         timer.record("apply fp8")
     else:
         devices.fp8 = False
@@ -769,7 +769,7 @@ def reuse_model_from_already_loaded(sd_model, checkpoint_info, timer):
         return None
 
 
-def reload_model_weights(sd_model=None, info=None):
+def reload_model_weights(sd_model=None, info=None, forced_reload=False):
     checkpoint_info = info or select_checkpoint()
 
     timer = Timer()
@@ -781,11 +781,14 @@ def reload_model_weights(sd_model=None, info=None):
         current_checkpoint_info = None
     else:
         current_checkpoint_info = sd_model.sd_checkpoint_info
-        if sd_model.sd_model_checkpoint == checkpoint_info.filename:
+        if check_fp8(sd_model) != devices.fp8:
+            # load from state dict again to prevent extra numerical errors
+            forced_reload = True
+        elif sd_model.sd_model_checkpoint == checkpoint_info.filename:
             return sd_model
 
     sd_model = reuse_model_from_already_loaded(sd_model, checkpoint_info, timer)
-    if sd_model is not None and sd_model.sd_checkpoint_info.filename == checkpoint_info.filename:
+    if not forced_reload and sd_model is not None and sd_model.sd_checkpoint_info.filename == checkpoint_info.filename:
         return sd_model
 
     if sd_model is not None:
-- 
cgit v1.2.3


From 370a77f8e78e65a8a1339289d684cb43df142f70 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Tue, 21 Nov 2023 19:59:34 +0800
Subject: Option for using fp16 weight when apply lora

---
 modules/sd_models.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index eb491434..0a7777f1 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -413,14 +413,22 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         devices.dtype_unet = torch.float16
         timer.record("apply half()")
 
+    for module in model.modules():
+        if hasattr(module, 'fp16_weight'):
+            del module.fp16_weight
+        if hasattr(module, 'fp16_bias'):
+            del module.fp16_bias
+
     if check_fp8(model):
         devices.fp8 = True
         first_stage = model.first_stage_model
         model.first_stage_model = None
         for module in model.modules():
-            if isinstance(module, torch.nn.Conv2d):
-                module.to(torch.float8_e4m3fn)
-            elif isinstance(module, torch.nn.Linear):
+            if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
+                if shared.opts.cache_fp16_weight:
+                    module.fp16_weight = module.weight.clone().half()
+                    if module.bias is not None:
+                        module.fp16_bias = module.bias.clone().half()
                 module.to(torch.float8_e4m3fn)
         model.first_stage_model = first_stage
         timer.record("apply fp8")
-- 
cgit v1.2.3


From 40ac134c553ac824d4a96666bba14d550300daa5 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 25 Nov 2023 12:35:09 +0800
Subject: Fix pre-fp8

---
 modules/sd_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 0a7777f1..90437c87 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -357,7 +357,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
     sd_model_hash = checkpoint_info.calculate_shorthash()
     timer.record("calculate hash")
 
-    if not check_fp8(model) and devices.fp8:
+    if devices.fp8:
         # prevent model to load state dict in fp8
         model.half()
 
-- 
cgit v1.2.3


From b25c126ccdbc4da22ade46597a9addf808998989 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Wed, 29 Nov 2023 17:38:53 -0500
Subject: Protect alphas_cumprod from downcasting

---
 modules/sd_models.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 841402e8..de80a493 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -387,7 +387,11 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         if shared.cmd_opts.upcast_sampling and depth_model:
             model.depth_model = None
 
+        alphas_cumprod = model.alphas_cumprod
+        model.alphas_cumprod = None
         model.half()
+        model.alphas_cumprod = alphas_cumprod
+        model.alphas_cumprod_original = alphas_cumprod
         model.first_stage_model = vae
         if depth_model:
             model.depth_model = depth_model
@@ -642,6 +646,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
     else:
         weight_dtype_conversion = {
             'first_stage_model': None,
+            'alphas_cumprod': None,
             '': torch.float16,
         }
 
-- 
cgit v1.2.3


From 50a21cb09fe3e9ea2d4fe058e0484e192c8a86e3 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 2 Dec 2023 22:06:47 +0800
Subject: Ensure the cached weight will not be affected

---
 modules/sd_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 4b8a9ae6..dcf816b3 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -435,9 +435,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         for module in model.modules():
             if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
                 if shared.opts.cache_fp16_weight:
-                    module.fp16_weight = module.weight.clone().half()
+                    module.fp16_weight = module.weight.data.clone().cpu().half()
                     if module.bias is not None:
-                        module.fp16_bias = module.bias.clone().half()
+                        module.fp16_bias = module.bias.data.clone().cpu().half()
                 module.to(torch.float8_e4m3fn)
         model.first_stage_model = first_stage
         timer.record("apply fp8")
-- 
cgit v1.2.3


From dc1adeecdd02f3fb910481e808a6d60a77100fea Mon Sep 17 00:00:00 2001
From: drhead <runemaste644@gmail.com>
Date: Sat, 2 Dec 2023 14:06:56 -0500
Subject: Create alphas_cumprod_original on full precision path

---
 modules/sd_models.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index de80a493..976c7d5b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -374,6 +374,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
     if shared.cmd_opts.no_half:
         model.float()
+        model.alphas_cumprod_original = alphas_cumprod
         devices.dtype_unet = torch.float32
         timer.record("apply float()")
     else:
-- 
cgit v1.2.3


From 78acdcf677a96894651ff0d7d8287f2a994f3781 Mon Sep 17 00:00:00 2001
From: drhead <runemaste644@gmail.com>
Date: Sat, 2 Dec 2023 14:09:18 -0500
Subject: fix variable

---
 modules/sd_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 976c7d5b..5a19a00a 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -374,7 +374,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
     if shared.cmd_opts.no_half:
         model.float()
-        model.alphas_cumprod_original = alphas_cumprod
+        model.alphas_cumprod_original = model.alphas_cumprod
         devices.dtype_unet = torch.float32
         timer.record("apply float()")
     else:
-- 
cgit v1.2.3


From 672dc4efa8e0da38426b121e7c7216d0a8e465fd Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Wed, 6 Dec 2023 15:16:10 +0800
Subject: Fix forced reload

---
 modules/sd_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index dcf816b3..d0046f88 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -801,7 +801,7 @@ def reload_model_weights(sd_model=None, info=None, forced_reload=False):
         if check_fp8(sd_model) != devices.fp8:
             # load from state dict again to prevent extra numerical errors
             forced_reload = True
-        elif sd_model.sd_model_checkpoint == checkpoint_info.filename:
+        elif sd_model.sd_model_checkpoint == checkpoint_info.filename and not forced_reload:
             return sd_model
 
     sd_model = reuse_model_from_already_loaded(sd_model, checkpoint_info, timer)
-- 
cgit v1.2.3


From a183de04e3f965083e7f3462201327d30c36b958 Mon Sep 17 00:00:00 2001
From: Nuullll <vfirst218@gmail.com>
Date: Sat, 6 Jan 2024 20:03:33 +0800
Subject: Execute model_loaded_callback after moving to target device

---
 modules/sd_models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 50bc209e..2c045771 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -842,13 +842,13 @@ def reload_model_weights(sd_model=None, info=None, forced_reload=False):
         sd_hijack.model_hijack.hijack(sd_model)
         timer.record("hijack")
 
-        script_callbacks.model_loaded_callback(sd_model)
-        timer.record("script callbacks")
-
         if not sd_model.lowvram:
             sd_model.to(devices.device)
             timer.record("move model to device")
 
+        script_callbacks.model_loaded_callback(sd_model)
+        timer.record("script callbacks")
+
     print(f"Weights loaded in {timer.summary()}.")
 
     model_data.set_sd_model(sd_model)
-- 
cgit v1.2.3


From 28bc85a20a282285710e17c4d86cf9db5e00d7db Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 2 Mar 2024 06:40:32 +0300
Subject: Merge pull request #14979 from drhead/refiner_cumprod_fix

Protect alphas_cumprod during refiner switchover
---
 modules/sd_models.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 2c045771..fbd53adb 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -15,6 +15,7 @@ from ldm.util import instantiate_from_config
 
 from modules import paths, shared, modelloader, devices, script_callbacks, sd_vae, sd_disable_initialization, errors, hashes, sd_models_config, sd_unet, sd_models_xl, cache, extra_networks, processing, lowvram, sd_hijack, patches
 from modules.timer import Timer
+from modules.shared import opts
 import tomesd
 import numpy as np
 
@@ -549,6 +550,36 @@ def repair_config(sd_config):
         karlo_path = os.path.join(paths.models_path, 'karlo')
         sd_config.model.params.noise_aug_config.params.clip_stats_path = sd_config.model.params.noise_aug_config.params.clip_stats_path.replace("checkpoints/karlo_models", karlo_path)
 
+def apply_alpha_schedule_override(sd_model, p=None):
+    def rescale_zero_terminal_snr_abar(alphas_cumprod):
+        alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+        # Store old values.
+        alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+        alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+        # Shift so the last timestep is zero.
+        alphas_bar_sqrt -= (alphas_bar_sqrt_T)
+
+        # Scale so the first timestep is back to the old value.
+        alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+        # Convert alphas_bar_sqrt to betas
+        alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+        alphas_bar[-1] = 4.8973451890853435e-08
+        return alphas_bar
+
+    if hasattr(sd_model, 'alphas_cumprod') and hasattr(sd_model, 'alphas_cumprod_original'):
+        sd_model.alphas_cumprod = sd_model.alphas_cumprod_original.to(shared.device)
+
+        if opts.use_downcasted_alpha_bar:
+            if p is not None:
+                p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
+            sd_model.alphas_cumprod = sd_model.alphas_cumprod.half().to(shared.device)
+        if opts.sd_noise_schedule == "Zero Terminal SNR":
+            if p is not None:
+                p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
+            sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(sd_model.alphas_cumprod).to(shared.device)
 
 sd1_clip_weight = 'cond_stage_model.transformer.text_model.embeddings.token_embedding.weight'
 sd2_clip_weight = 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight'
@@ -812,6 +843,7 @@ def reload_model_weights(sd_model=None, info=None, forced_reload=False):
 
     sd_model = reuse_model_from_already_loaded(sd_model, checkpoint_info, timer)
     if not forced_reload and sd_model is not None and sd_model.sd_checkpoint_info.filename == checkpoint_info.filename:
+        apply_alpha_schedule_override(sd_model)
         return sd_model
 
     if sd_model is not None:
-- 
cgit v1.2.3


From da67afe5f68497a04d1fd9173bbd256b73d9d251 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 2 Mar 2024 06:53:53 +0300
Subject: call apply_alpha_schedule_override in load_model_weights for #14979

---
 modules/sd_models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index fbd53adb..db72e120 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -428,6 +428,8 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         devices.dtype_unet = torch.float16
         timer.record("apply half()")
 
+    apply_alpha_schedule_override(model)
+
     for module in model.modules():
         if hasattr(module, 'fp16_weight'):
             del module.fp16_weight
@@ -843,7 +845,6 @@ def reload_model_weights(sd_model=None, info=None, forced_reload=False):
 
     sd_model = reuse_model_from_already_loaded(sd_model, checkpoint_info, timer)
     if not forced_reload and sd_model is not None and sd_model.sd_checkpoint_info.filename == checkpoint_info.filename:
-        apply_alpha_schedule_override(sd_model)
         return sd_model
 
     if sd_model is not None:
-- 
cgit v1.2.3


From 141a17e9693065c33a2b1d30f04a0083bb687775 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 2 Mar 2024 06:54:11 +0300
Subject: style changes for #14979

---
 modules/sd_models.py | 70 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

(limited to 'modules/sd_models.py')

diff --git a/modules/sd_models.py b/modules/sd_models.py
index db72e120..747fc39e 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -552,36 +552,48 @@ def repair_config(sd_config):
         karlo_path = os.path.join(paths.models_path, 'karlo')
         sd_config.model.params.noise_aug_config.params.clip_stats_path = sd_config.model.params.noise_aug_config.params.clip_stats_path.replace("checkpoints/karlo_models", karlo_path)
 
+
+def rescale_zero_terminal_snr_abar(alphas_cumprod):
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= (alphas_bar_sqrt_T)
+
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt ** 2  # Revert sqrt
+    alphas_bar[-1] = 4.8973451890853435e-08
+    return alphas_bar
+
+
 def apply_alpha_schedule_override(sd_model, p=None):
-    def rescale_zero_terminal_snr_abar(alphas_cumprod):
-        alphas_bar_sqrt = alphas_cumprod.sqrt()
-
-        # Store old values.
-        alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
-        alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
-
-        # Shift so the last timestep is zero.
-        alphas_bar_sqrt -= (alphas_bar_sqrt_T)
-
-        # Scale so the first timestep is back to the old value.
-        alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
-
-        # Convert alphas_bar_sqrt to betas
-        alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
-        alphas_bar[-1] = 4.8973451890853435e-08
-        return alphas_bar
-
-    if hasattr(sd_model, 'alphas_cumprod') and hasattr(sd_model, 'alphas_cumprod_original'):
-        sd_model.alphas_cumprod = sd_model.alphas_cumprod_original.to(shared.device)
-
-        if opts.use_downcasted_alpha_bar:
-            if p is not None:
-                p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
-            sd_model.alphas_cumprod = sd_model.alphas_cumprod.half().to(shared.device)
-        if opts.sd_noise_schedule == "Zero Terminal SNR":
-            if p is not None:
-                p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
-            sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(sd_model.alphas_cumprod).to(shared.device)
+    """
+    Applies an override to the alpha schedule of the model according to settings.
+    - downcasts the alpha schedule to half precision
+    - rescales the alpha schedule to have zero terminal SNR
+    """
+
+    if not hasattr(sd_model, 'alphas_cumprod') or not hasattr(sd_model, 'alphas_cumprod_original'):
+        return
+
+    sd_model.alphas_cumprod = sd_model.alphas_cumprod_original.to(shared.device)
+
+    if opts.use_downcasted_alpha_bar:
+        if p is not None:
+            p.extra_generation_params['Downcast alphas_cumprod'] = opts.use_downcasted_alpha_bar
+        sd_model.alphas_cumprod = sd_model.alphas_cumprod.half().to(shared.device)
+
+    if opts.sd_noise_schedule == "Zero Terminal SNR":
+        if p is not None:
+            p.extra_generation_params['Noise Schedule'] = opts.sd_noise_schedule
+        sd_model.alphas_cumprod = rescale_zero_terminal_snr_abar(sd_model.alphas_cumprod).to(shared.device)
+
 
 sd1_clip_weight = 'cond_stage_model.transformer.text_model.embeddings.token_embedding.weight'
 sd2_clip_weight = 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight'
-- 
cgit v1.2.3