From aab385d01b4311726127397552d791f4d71b7147 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 3 Sep 2023 11:56:02 +0900
Subject: thread safe extra network list_items

---
 extensions-builtin/Lora/ui_extra_networks_lora.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index 55409a78..e9f30062 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -66,11 +66,11 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         return item
 
     def list_items(self):
-        for index, name in enumerate(networks.available_networks):
-            item = self.create_item(name, index)
-
-            if item is not None:
-                yield item
+        with self.thread_lock:
+            for index, name in enumerate(networks.available_networks):
+                item = self.create_item(name, index)
+                if item is not None:
+                    yield item
 
     def allowed_directories_for_previews(self):
         return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir_backcompat]
-- 
cgit v1.2.3


From 25de9a785cc9e93c16626db6ab5b16824443de53 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 9 Sep 2023 16:56:19 +0900
Subject: Revert "thread safe extra network list_items"

This reverts commit aab385d01b4311726127397552d791f4d71b7147.
---
 extensions-builtin/Lora/ui_extra_networks_lora.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index e9f30062..55409a78 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -66,11 +66,11 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         return item
 
     def list_items(self):
-        with self.thread_lock:
-            for index, name in enumerate(networks.available_networks):
-                item = self.create_item(name, index)
-                if item is not None:
-                    yield item
+        for index, name in enumerate(networks.available_networks):
+            item = self.create_item(name, index)
+
+            if item is not None:
+                yield item
 
     def allowed_directories_for_previews(self):
         return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir_backcompat]
-- 
cgit v1.2.3


From f5959c1c3022c454de22fab749d0f06ab3219868 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 9 Sep 2023 17:05:50 +0900
Subject: thread safe extra network using list

---
 extensions-builtin/Lora/ui_extra_networks_lora.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index 55409a78..e74daa77 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -66,7 +66,8 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         return item
 
     def list_items(self):
-        for index, name in enumerate(networks.available_networks):
+        names = list(networks.available_networks)
+        for index, name in enumerate(names):
             item = self.create_item(name, index)
 
             if item is not None:
-- 
cgit v1.2.3


From e785402b6acca12108e15224ff80d58817ab3c27 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 9 Sep 2023 17:28:06 +0900
Subject: return nothing if not found

---
 extensions-builtin/Lora/ui_extra_networks_lora.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index e74daa77..dac90a86 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -17,6 +17,8 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
 
     def create_item(self, name, index=None, enable_filter=True):
         lora_on_disk = networks.available_networks.get(name)
+        if lora_on_disk is None:
+            return
 
         path, ext = os.path.splitext(lora_on_disk.filename)
 
@@ -69,7 +71,6 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         names = list(networks.available_networks)
         for index, name in enumerate(names):
             item = self.create_item(name, index)
-
             if item is not None:
                 yield item
 
-- 
cgit v1.2.3


From 74b80e72115af46bf1c04167a30f9ec5025cb464 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Tue, 12 Sep 2023 09:29:07 +0900
Subject: add comment

---
 extensions-builtin/Lora/ui_extra_networks_lora.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index dac90a86..df02c663 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -68,6 +68,7 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         return item
 
     def list_items(self):
+        # instantiate a list to protect against concurrent modification
         names = list(networks.available_networks)
         for index, name in enumerate(names):
             item = self.create_item(name, index)
-- 
cgit v1.2.3


From ec718f76b58b183859ed732e11ec748c41a13f76 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Tue, 17 Oct 2023 23:35:50 -0700
Subject: wip incorrect OFT implementation

---
 extensions-builtin/Lora/network_oft.py | 82 ++++++++++++++++++++++++++++++++++
 extensions-builtin/Lora/networks.py    |  5 +++
 2 files changed, 87 insertions(+)
 create mode 100644 extensions-builtin/Lora/network_oft.py

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
new file mode 100644
index 00000000..9ddb175c
--- /dev/null
+++ b/extensions-builtin/Lora/network_oft.py
@@ -0,0 +1,82 @@
+import torch
+import network
+
+
+class ModuleTypeOFT(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["oft_blocks"]):
+            return NetworkModuleOFT(net, weights)
+
+        return None
+
+# adapted from https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+class NetworkModuleOFT(network.NetworkModule):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+
+        self.oft_blocks = weights.w["oft_blocks"]
+        self.alpha = weights.w["alpha"]
+
+        self.dim = self.oft_blocks.shape[0]
+        self.num_blocks = self.dim
+
+        #if type(self.alpha) == torch.Tensor:
+        #    self.alpha = self.alpha.detach().numpy()
+
+        if "Linear" in self.sd_module.__class__.__name__:
+            self.out_dim = self.sd_module.out_features
+        elif "Conv" in self.sd_module.__class__.__name__:
+            self.out_dim = self.sd_module.out_channels
+
+        self.constraint = self.alpha * self.out_dim
+        self.block_size = self.out_dim // self.num_blocks
+
+        self.oft_multiplier = self.multiplier()
+
+        # replace forward method of original linear rather than replacing the module
+        # self.org_forward = self.sd_module.forward
+        # self.sd_module.forward = self.forward
+    
+    def get_weight(self):
+        block_Q = self.oft_blocks - self.oft_blocks.transpose(1, 2)
+        norm_Q = torch.norm(block_Q.flatten())
+        new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
+        block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+        I = torch.eye(self.block_size, device=self.oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
+
+        block_R_weighted = self.oft_multiplier * block_R + (1 - self.oft_multiplier) * I
+        R = torch.block_diag(*block_R_weighted)
+
+        return R
+
+    def calc_updown(self, orig_weight):
+        oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        block_Q = oft_blocks - oft_blocks.transpose(1, 2)
+        norm_Q = torch.norm(block_Q.flatten())
+        new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
+        block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+        I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
+
+        block_R_weighted = self.oft_multiplier * block_R + (1 - self.oft_multiplier) * I
+        R = torch.block_diag(*block_R_weighted)
+        #R = self.get_weight().to(orig_weight.device, dtype=orig_weight.dtype)
+        # W = R*W_0
+        updown = orig_weight + R
+        output_shape = [R.size(0), orig_weight.size(1)]
+        return self.finalize_updown(updown, orig_weight, output_shape)
+    
+    # def forward(self, x, y=None):
+    #     x = self.org_forward(x)
+    #     if self.oft_multiplier == 0.0:
+    #         return x
+
+    #     R = self.get_weight().to(x.device, dtype=x.dtype)
+    #     if x.dim() == 4:
+    #         x = x.permute(0, 2, 3, 1)
+    #         x = torch.matmul(x, R)
+    #         x = x.permute(0, 3, 1, 2)
+    #     else:
+    #         x = torch.matmul(x, R)
+    #     return x
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 60d8dec4..bd1f1b75 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -11,6 +11,7 @@ import network_ia3
 import network_lokr
 import network_full
 import network_norm
+import network_oft
 
 import torch
 from typing import Union
@@ -28,6 +29,7 @@ module_types = [
     network_full.ModuleTypeFull(),
     network_norm.ModuleTypeNorm(),
     network_glora.ModuleTypeGLora(),
+    network_oft.ModuleTypeOFT(),
 ]
 
 
@@ -183,6 +185,9 @@ def load_network(name, network_on_disk):
         elif sd_module is None and "lora_te1_text_model" in key_network_without_network_parts:
             key = key_network_without_network_parts.replace("lora_te1_text_model", "0_transformer_text_model")
             sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+        elif sd_module is None and "oft_unet" in key_network_without_network_parts:
+            key = key_network_without_network_parts.replace("oft_unet", "diffusion_model")
+            sd_module = shared.sd_model.network_layer_mapping.get(key, None)
 
             # some SD1 Loras also have correct compvis keys
             if sd_module is None:
-- 
cgit v1.2.3


From 1c6efdbba774d603c592debaccd6f5ad827bd1b2 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Wed, 18 Oct 2023 04:16:01 -0700
Subject: inference working but SLOW

---
 extensions-builtin/Lora/network_oft.py | 73 +++++++++++++++++-----------------
 extensions-builtin/Lora/networks.py    | 42 +++++++++++++++++--
 2 files changed, 75 insertions(+), 40 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 9ddb175c..f085eca5 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -12,6 +12,7 @@ class ModuleTypeOFT(network.ModuleType):
 # adapted from https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
 class NetworkModuleOFT(network.NetworkModule):
     def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+
         super().__init__(net, weights)
 
         self.oft_blocks = weights.w["oft_blocks"]
@@ -20,24 +21,29 @@ class NetworkModuleOFT(network.NetworkModule):
         self.dim = self.oft_blocks.shape[0]
         self.num_blocks = self.dim
 
-        #if type(self.alpha) == torch.Tensor:
-        #    self.alpha = self.alpha.detach().numpy()
-
         if "Linear" in self.sd_module.__class__.__name__:
             self.out_dim = self.sd_module.out_features
         elif "Conv" in self.sd_module.__class__.__name__:
             self.out_dim = self.sd_module.out_channels
 
-        self.constraint = self.alpha * self.out_dim
+        self.constraint = self.alpha
+        #self.constraint = self.alpha * self.out_dim
         self.block_size = self.out_dim // self.num_blocks
 
-        self.oft_multiplier = self.multiplier()
+        self.org_module: list[torch.Module] = [self.sd_module]
+
+        self.R = self.get_weight()
 
-        # replace forward method of original linear rather than replacing the module
-        # self.org_forward = self.sd_module.forward
-        # self.sd_module.forward = self.forward
+        self.apply_to()
+
+    # replace forward method of original linear rather than replacing the module
+    def apply_to(self):
+        self.org_forward = self.org_module[0].forward
+        self.org_module[0].forward = self.forward
     
-    def get_weight(self):
+    def get_weight(self, multiplier=None):
+        if not multiplier:
+            multiplier = self.multiplier()
         block_Q = self.oft_blocks - self.oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
         new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
@@ -45,38 +51,31 @@ class NetworkModuleOFT(network.NetworkModule):
         I = torch.eye(self.block_size, device=self.oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
         block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
 
-        block_R_weighted = self.oft_multiplier * block_R + (1 - self.oft_multiplier) * I
+        block_R_weighted = multiplier * block_R + (1 - multiplier) * I
         R = torch.block_diag(*block_R_weighted)
 
         return R
 
     def calc_updown(self, orig_weight):
-        oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-        block_Q = oft_blocks - oft_blocks.transpose(1, 2)
-        norm_Q = torch.norm(block_Q.flatten())
-        new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
-        block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
-        block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
-
-        block_R_weighted = self.oft_multiplier * block_R + (1 - self.oft_multiplier) * I
-        R = torch.block_diag(*block_R_weighted)
-        #R = self.get_weight().to(orig_weight.device, dtype=orig_weight.dtype)
-        # W = R*W_0
-        updown = orig_weight + R
-        output_shape = [R.size(0), orig_weight.size(1)]
+        R = self.R
+        if orig_weight.dim() == 4:
+            weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
+        else:
+            weight = torch.einsum("oi, op -> pi", orig_weight, R)
+        updown = orig_weight @ R
+        output_shape = [orig_weight.size(0), R.size(1)]
+        #output_shape = [R.size(0), orig_weight.size(1)]
         return self.finalize_updown(updown, orig_weight, output_shape)
     
-    # def forward(self, x, y=None):
-    #     x = self.org_forward(x)
-    #     if self.oft_multiplier == 0.0:
-    #         return x
-
-    #     R = self.get_weight().to(x.device, dtype=x.dtype)
-    #     if x.dim() == 4:
-    #         x = x.permute(0, 2, 3, 1)
-    #         x = torch.matmul(x, R)
-    #         x = x.permute(0, 3, 1, 2)
-    #     else:
-    #         x = torch.matmul(x, R)
-    #     return x
+    def forward(self, x, y=None):
+        x = self.org_forward(x)
+        if self.multiplier() == 0.0:
+            return x
+        R = self.get_weight().to(x.device, dtype=x.dtype)
+        if x.dim() == 4:
+            x = x.permute(0, 2, 3, 1)
+            x = torch.matmul(x, R)
+            x = x.permute(0, 3, 1, 2)
+        else:
+            x = torch.matmul(x, R)
+        return x
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index bd1f1b75..e5e73450 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -169,6 +169,10 @@ def load_network(name, network_on_disk):
             else:
                 emb_dict[vec_name] = weight
             bundle_embeddings[emb_name] = emb_dict
+        
+        #if key_network_without_network_parts == "oft_unet":
+        #    print(key_network_without_network_parts)
+        #    pass
 
         key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2)
         sd_module = shared.sd_model.network_layer_mapping.get(key, None)
@@ -185,15 +189,39 @@ def load_network(name, network_on_disk):
         elif sd_module is None and "lora_te1_text_model" in key_network_without_network_parts:
             key = key_network_without_network_parts.replace("lora_te1_text_model", "0_transformer_text_model")
             sd_module = shared.sd_model.network_layer_mapping.get(key, None)
-        elif sd_module is None and "oft_unet" in key_network_without_network_parts:
-            key = key_network_without_network_parts.replace("oft_unet", "diffusion_model")
-            sd_module = shared.sd_model.network_layer_mapping.get(key, None)
 
             # some SD1 Loras also have correct compvis keys
             if sd_module is None:
                 key = key_network_without_network_parts.replace("lora_te1_text_model", "transformer_text_model")
                 sd_module = shared.sd_model.network_layer_mapping.get(key, None)
 
+        elif sd_module is None and "oft_unet" in key_network_without_network_parts:
+        #    UNET_TARGET_REPLACE_MODULE_ALL_LINEAR = ["Transformer2DModel"]
+        #    UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]
+            UNET_TARGET_REPLACE_MODULE_ATTN_ONLY = ["CrossAttention"]
+            # TODO: Change matchedm odules based on whether all linear, conv, etc
+
+            key = key_network_without_network_parts.replace("oft_unet", "diffusion_model")
+            sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+            #key_no_suffix = key.rsplit("_to_", 1)[0]
+            ## Match all modules of class CrossAttention
+            #replace_module_list = []
+            #for module_type in UNET_TARGET_REPLACE_MODULE_ATTN_ONLY:
+            #    replace_module_list += [module for k, module in shared.sd_model.network_layer_mapping.items() if module_type in module.__class__.__name__]
+
+            #matched_module = replace_module_list.get(key_no_suffix, None)
+            #if key.endswith('to_q'):
+            #    sd_module = matched_module.to_q or None
+            #if key.endswith('to_k'):
+            #    sd_module = matched_module.to_k or None
+            #if key.endswith('to_v'):
+            #    sd_module = matched_module.to_v or None
+            #if key.endswith('to_out_0'):
+            #    sd_module = matched_module.to_out[0] or None
+            #if key.endswith('to_out_1'):
+            #    sd_module = matched_module.to_out[1] or None
+
+
         if sd_module is None:
             keys_failed_to_match[key_network] = key
             continue
@@ -214,6 +242,14 @@ def load_network(name, network_on_disk):
             raise AssertionError(f"Could not find a module type (out of {', '.join([x.__class__.__name__ for x in module_types])}) that would accept those keys: {', '.join(weights.w)}")
 
         net.modules[key] = net_module
+    
+    # replaces forward method of original Linear
+    # applied_to_count = 0
+    #for key, created_module in net.modules.items():
+    #    if isinstance(created_module, network_oft.NetworkModuleOFT):
+    #        net_module.apply_to()
+            #applied_to_count += 1
+    # print(f'Applied OFT modules: {applied_to_count}')
 
     embeddings = {}
     for emb_name, data in bundle_embeddings.items():
-- 
cgit v1.2.3


From 853e21d98eada4db9a9fd1ae8eda90cf763e2818 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Wed, 18 Oct 2023 04:27:44 -0700
Subject: faster by using cached R in forward

---
 extensions-builtin/Lora/network_oft.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index f085eca5..68efb1db 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -57,21 +57,32 @@ class NetworkModuleOFT(network.NetworkModule):
         return R
 
     def calc_updown(self, orig_weight):
+        # this works
         R = self.R
+
+        # this causes major deepfrying i.e. just doesn't work
+        # R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
+
         if orig_weight.dim() == 4:
             weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
         else:
             weight = torch.einsum("oi, op -> pi", orig_weight, R)
+
         updown = orig_weight @ R
-        output_shape = [orig_weight.size(0), R.size(1)]
-        #output_shape = [R.size(0), orig_weight.size(1)]
+        output_shape = self.oft_blocks.shape
+
+        ## this works
+        # updown = orig_weight @ R
+        # output_shape = [orig_weight.size(0), R.size(1)]
+
         return self.finalize_updown(updown, orig_weight, output_shape)
     
     def forward(self, x, y=None):
         x = self.org_forward(x)
         if self.multiplier() == 0.0:
             return x
-        R = self.get_weight().to(x.device, dtype=x.dtype)
+        #R = self.get_weight().to(x.device, dtype=x.dtype)
+        R = self.R.to(x.device, dtype=x.dtype)
         if x.dim() == 4:
             x = x.permute(0, 2, 3, 1)
             x = torch.matmul(x, R)
-- 
cgit v1.2.3


From eb01d7f0e0fb46285985803296a25715165fb3f9 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Wed, 18 Oct 2023 04:56:53 -0700
Subject: faster by calculating R in updown and using cached R in forward

---
 extensions-builtin/Lora/network_oft.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 68efb1db..fd5b0c0f 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -58,17 +58,18 @@ class NetworkModuleOFT(network.NetworkModule):
 
     def calc_updown(self, orig_weight):
         # this works
-        R = self.R
+        # R = self.R
+        self.R = self.get_weight(self.multiplier())
 
-        # this causes major deepfrying i.e. just doesn't work
+        # sending R to device causes major deepfrying i.e. just doesn't work
         # R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
 
-        if orig_weight.dim() == 4:
-            weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
-        else:
-            weight = torch.einsum("oi, op -> pi", orig_weight, R)
+        # if orig_weight.dim() == 4:
+        #     weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
+        # else:
+        #     weight = torch.einsum("oi, op -> pi", orig_weight, R)
 
-        updown = orig_weight @ R
+        updown = orig_weight @ self.R
         output_shape = self.oft_blocks.shape
 
         ## this works
-- 
cgit v1.2.3


From 7c128bbdac0da1767c239174e91af6f327845372 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 19 Oct 2023 13:56:17 +0800
Subject: Add fp8 for sd unet

---
 extensions-builtin/Lora/network.py       |  2 +-
 extensions-builtin/Lora/network_full.py  |  4 ++--
 extensions-builtin/Lora/network_glora.py | 10 +++++-----
 extensions-builtin/Lora/network_hada.py  | 12 ++++++------
 extensions-builtin/Lora/network_ia3.py   |  2 +-
 extensions-builtin/Lora/network_lokr.py  | 18 +++++++++---------
 extensions-builtin/Lora/network_lora.py  |  6 +++---
 extensions-builtin/Lora/network_norm.py  |  4 ++--
 extensions-builtin/Lora/networks.py      |  6 +++---
 9 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py
index 6021fd8d..a62e5eff 100644
--- a/extensions-builtin/Lora/network.py
+++ b/extensions-builtin/Lora/network.py
@@ -137,7 +137,7 @@ class NetworkModule:
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         if self.bias is not None:
             updown = updown.reshape(self.bias.shape)
-            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown += self.bias.to(orig_weight.device, dtype=updown.dtype)
             updown = updown.reshape(output_shape)
 
         if len(output_shape) == 4:
diff --git a/extensions-builtin/Lora/network_full.py b/extensions-builtin/Lora/network_full.py
index bf6930e9..f221c95f 100644
--- a/extensions-builtin/Lora/network_full.py
+++ b/extensions-builtin/Lora/network_full.py
@@ -18,9 +18,9 @@ class NetworkModuleFull(network.NetworkModule):
 
     def calc_updown(self, orig_weight):
         output_shape = self.weight.shape
-        updown = self.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.weight.to(orig_weight.device)
         if self.ex_bias is not None:
-            ex_bias = self.ex_bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.ex_bias.to(orig_weight.device)
         else:
             ex_bias = None
 
diff --git a/extensions-builtin/Lora/network_glora.py b/extensions-builtin/Lora/network_glora.py
index 492d4870..efe5c681 100644
--- a/extensions-builtin/Lora/network_glora.py
+++ b/extensions-builtin/Lora/network_glora.py
@@ -22,12 +22,12 @@ class NetworkModuleGLora(network.NetworkModule):
         self.w2b = weights.w["b2.weight"]
 
     def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)
 
         output_shape = [w1a.size(0), w1b.size(1)]
-        updown = ((w2b @ w1b) + ((orig_weight @ w2a) @ w1a))
+        updown = ((w2b @ w1b) + ((orig_weight.to(dtype = w1a.dtype) @ w2a) @ w1a))
 
         return self.finalize_updown(updown, orig_weight, output_shape)
diff --git a/extensions-builtin/Lora/network_hada.py b/extensions-builtin/Lora/network_hada.py
index 5fcb0695..d95a0fd1 100644
--- a/extensions-builtin/Lora/network_hada.py
+++ b/extensions-builtin/Lora/network_hada.py
@@ -27,16 +27,16 @@ class NetworkModuleHada(network.NetworkModule):
         self.t2 = weights.w.get("hada_t2")
 
     def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)
 
         output_shape = [w1a.size(0), w1b.size(1)]
 
         if self.t1 is not None:
             output_shape = [w1a.size(1), w1b.size(1)]
-            t1 = self.t1.to(orig_weight.device, dtype=orig_weight.dtype)
+            t1 = self.t1.to(orig_weight.device)
             updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
             output_shape += t1.shape[2:]
         else:
@@ -45,7 +45,7 @@ class NetworkModuleHada(network.NetworkModule):
             updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
 
         if self.t2 is not None:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
             updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
         else:
             updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)
diff --git a/extensions-builtin/Lora/network_ia3.py b/extensions-builtin/Lora/network_ia3.py
index 7edc4249..96faeaf3 100644
--- a/extensions-builtin/Lora/network_ia3.py
+++ b/extensions-builtin/Lora/network_ia3.py
@@ -17,7 +17,7 @@ class NetworkModuleIa3(network.NetworkModule):
         self.on_input = weights.w["on_input"].item()
 
     def calc_updown(self, orig_weight):
-        w = self.w.to(orig_weight.device, dtype=orig_weight.dtype)
+        w = self.w.to(orig_weight.device)
 
         output_shape = [w.size(0), orig_weight.size(1)]
         if self.on_input:
diff --git a/extensions-builtin/Lora/network_lokr.py b/extensions-builtin/Lora/network_lokr.py
index 340acdab..fcdaeafd 100644
--- a/extensions-builtin/Lora/network_lokr.py
+++ b/extensions-builtin/Lora/network_lokr.py
@@ -37,22 +37,22 @@ class NetworkModuleLokr(network.NetworkModule):
 
     def calc_updown(self, orig_weight):
         if self.w1 is not None:
-            w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1 = self.w1.to(orig_weight.device)
         else:
-            w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1a = self.w1a.to(orig_weight.device)
+            w1b = self.w1b.to(orig_weight.device)
             w1 = w1a @ w1b
 
         if self.w2 is not None:
-            w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = self.w2.to(orig_weight.device)
         elif self.t2 is None:
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
             w2 = w2a @ w2b
         else:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
             w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
 
         output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py
index 26c0a72c..4cc40295 100644
--- a/extensions-builtin/Lora/network_lora.py
+++ b/extensions-builtin/Lora/network_lora.py
@@ -61,13 +61,13 @@ class NetworkModuleLora(network.NetworkModule):
         return module
 
     def calc_updown(self, orig_weight):
-        up = self.up_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
-        down = self.down_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        up = self.up_model.weight.to(orig_weight.device)
+        down = self.down_model.weight.to(orig_weight.device)
 
         output_shape = [up.size(0), down.size(1)]
         if self.mid_model is not None:
             # cp-decomposition
-            mid = self.mid_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+            mid = self.mid_model.weight.to(orig_weight.device)
             updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
             output_shape += mid.shape[2:]
         else:
diff --git a/extensions-builtin/Lora/network_norm.py b/extensions-builtin/Lora/network_norm.py
index ce450158..d25afcbb 100644
--- a/extensions-builtin/Lora/network_norm.py
+++ b/extensions-builtin/Lora/network_norm.py
@@ -18,10 +18,10 @@ class NetworkModuleNorm(network.NetworkModule):
 
     def calc_updown(self, orig_weight):
         output_shape = self.w_norm.shape
-        updown = self.w_norm.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.w_norm.to(orig_weight.device)
 
         if self.b_norm is not None:
-            ex_bias = self.b_norm.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.b_norm.to(orig_weight.device)
         else:
             ex_bias = None
 
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 60d8dec4..8ea4ea60 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -381,12 +381,12 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                             # inpainting model. zero pad updown to make channel[1]  4 to 9
                             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))
 
-                        self.weight += updown
+                        self.weight.copy_((self.weight.to(dtype=updown.dtype) + updown).to(dtype=self.weight.dtype))
                         if ex_bias is not None and hasattr(self, 'bias'):
                             if self.bias is None:
-                                self.bias = torch.nn.Parameter(ex_bias)
+                                self.bias = torch.nn.Parameter(ex_bias).to(self.weight.dtype)
                             else:
-                                self.bias += ex_bias
+                                self.bias.copy_((self.bias.to(dtype=ex_bias.dtype) + ex_bias).to(dtype=self.bias.dtype))
                 except RuntimeError as e:
                     logging.debug(f"Network {net.name} layer {network_layer_name}: {e}")
                     extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-- 
cgit v1.2.3


From 321680ccd0e0404223fbdf4f26498f7d0317fb75 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:41:17 -0700
Subject: refactor: fix constraint, re-use get_weight

---
 extensions-builtin/Lora/network_oft.py | 40 ++++++++++++++--------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index fd5b0c0f..2af1bc4c 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -9,7 +9,7 @@ class ModuleTypeOFT(network.ModuleType):
 
         return None
 
-# adapted from https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# adapted from kohya's implementation https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
 class NetworkModuleOFT(network.NetworkModule):
     def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
@@ -17,7 +17,6 @@ class NetworkModuleOFT(network.NetworkModule):
 
         self.oft_blocks = weights.w["oft_blocks"]
         self.alpha = weights.w["alpha"]
-
         self.dim = self.oft_blocks.shape[0]
         self.num_blocks = self.dim
 
@@ -26,64 +25,57 @@ class NetworkModuleOFT(network.NetworkModule):
         elif "Conv" in self.sd_module.__class__.__name__:
             self.out_dim = self.sd_module.out_channels
 
-        self.constraint = self.alpha
-        #self.constraint = self.alpha * self.out_dim
+        self.constraint = self.alpha * self.out_dim
         self.block_size = self.out_dim // self.num_blocks
 
         self.org_module: list[torch.Module] = [self.sd_module]
-
-        self.R = self.get_weight()
-
+        self.R = self.get_weight(self.oft_blocks)
         self.apply_to()
 
     # replace forward method of original linear rather than replacing the module
+    # how do we revert this to unload the weights?
     def apply_to(self):
         self.org_forward = self.org_module[0].forward
         self.org_module[0].forward = self.forward
     
-    def get_weight(self, multiplier=None):
-        if not multiplier:
-            multiplier = self.multiplier()
-        block_Q = self.oft_blocks - self.oft_blocks.transpose(1, 2)
+    def get_weight(self, oft_blocks, multiplier=None):
+        block_Q = oft_blocks - oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
         new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
         block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
         I = torch.eye(self.block_size, device=self.oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
         block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
-
-        block_R_weighted = multiplier * block_R + (1 - multiplier) * I
-        R = torch.block_diag(*block_R_weighted)
+        #block_R_weighted = multiplier * block_R + (1 - multiplier) * I
+        #R = torch.block_diag(*block_R_weighted)
+        R = torch.block_diag(*block_R)
 
         return R
 
     def calc_updown(self, orig_weight):
-        # this works
-        # R = self.R
-        self.R = self.get_weight(self.multiplier())
+        oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
 
-        # sending R to device causes major deepfrying i.e. just doesn't work
-        # R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
+        R = self.get_weight(oft_blocks)
+        self.R = R
 
         # if orig_weight.dim() == 4:
         #     weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
         # else:
         #     weight = torch.einsum("oi, op -> pi", orig_weight, R)
 
-        updown = orig_weight @ self.R
+        updown = orig_weight @ R
         output_shape = self.oft_blocks.shape
 
-        ## this works
-        # updown = orig_weight @ R
-        # output_shape = [orig_weight.size(0), R.size(1)]
-
         return self.finalize_updown(updown, orig_weight, output_shape)
     
     def forward(self, x, y=None):
         x = self.org_forward(x)
         if self.multiplier() == 0.0:
             return x
+
+        # calculating R here is excruciatingly slow
         #R = self.get_weight().to(x.device, dtype=x.dtype)
         R = self.R.to(x.device, dtype=x.dtype)
+
         if x.dim() == 4:
             x = x.permute(0, 2, 3, 1)
             x = torch.matmul(x, R)
-- 
cgit v1.2.3


From d10c4db57ed08234a7aed5f530f269ff78544ab0 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:52:14 -0700
Subject: style: formatting

---
 extensions-builtin/Lora/network_oft.py |  4 ++--
 extensions-builtin/Lora/networks.py    | 35 ----------------------------------
 2 files changed, 2 insertions(+), 37 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 2af1bc4c..0a87958e 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -37,7 +37,7 @@ class NetworkModuleOFT(network.NetworkModule):
     def apply_to(self):
         self.org_forward = self.org_module[0].forward
         self.org_module[0].forward = self.forward
-    
+
     def get_weight(self, oft_blocks, multiplier=None):
         block_Q = oft_blocks - oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
@@ -66,7 +66,7 @@ class NetworkModuleOFT(network.NetworkModule):
         output_shape = self.oft_blocks.shape
 
         return self.finalize_updown(updown, orig_weight, output_shape)
-    
+
     def forward(self, x, y=None):
         x = self.org_forward(x)
         if self.multiplier() == 0.0:
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index e5e73450..78a97033 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -169,10 +169,6 @@ def load_network(name, network_on_disk):
             else:
                 emb_dict[vec_name] = weight
             bundle_embeddings[emb_name] = emb_dict
-        
-        #if key_network_without_network_parts == "oft_unet":
-        #    print(key_network_without_network_parts)
-        #    pass
 
         key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2)
         sd_module = shared.sd_model.network_layer_mapping.get(key, None)
@@ -196,31 +192,8 @@ def load_network(name, network_on_disk):
                 sd_module = shared.sd_model.network_layer_mapping.get(key, None)
 
         elif sd_module is None and "oft_unet" in key_network_without_network_parts:
-        #    UNET_TARGET_REPLACE_MODULE_ALL_LINEAR = ["Transformer2DModel"]
-        #    UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]
-            UNET_TARGET_REPLACE_MODULE_ATTN_ONLY = ["CrossAttention"]
-            # TODO: Change matchedm odules based on whether all linear, conv, etc
-
             key = key_network_without_network_parts.replace("oft_unet", "diffusion_model")
             sd_module = shared.sd_model.network_layer_mapping.get(key, None)
-            #key_no_suffix = key.rsplit("_to_", 1)[0]
-            ## Match all modules of class CrossAttention
-            #replace_module_list = []
-            #for module_type in UNET_TARGET_REPLACE_MODULE_ATTN_ONLY:
-            #    replace_module_list += [module for k, module in shared.sd_model.network_layer_mapping.items() if module_type in module.__class__.__name__]
-
-            #matched_module = replace_module_list.get(key_no_suffix, None)
-            #if key.endswith('to_q'):
-            #    sd_module = matched_module.to_q or None
-            #if key.endswith('to_k'):
-            #    sd_module = matched_module.to_k or None
-            #if key.endswith('to_v'):
-            #    sd_module = matched_module.to_v or None
-            #if key.endswith('to_out_0'):
-            #    sd_module = matched_module.to_out[0] or None
-            #if key.endswith('to_out_1'):
-            #    sd_module = matched_module.to_out[1] or None
-
 
         if sd_module is None:
             keys_failed_to_match[key_network] = key
@@ -242,14 +215,6 @@ def load_network(name, network_on_disk):
             raise AssertionError(f"Could not find a module type (out of {', '.join([x.__class__.__name__ for x in module_types])}) that would accept those keys: {', '.join(weights.w)}")
 
         net.modules[key] = net_module
-    
-    # replaces forward method of original Linear
-    # applied_to_count = 0
-    #for key, created_module in net.modules.items():
-    #    if isinstance(created_module, network_oft.NetworkModuleOFT):
-    #        net_module.apply_to()
-            #applied_to_count += 1
-    # print(f'Applied OFT modules: {applied_to_count}')
 
     embeddings = {}
     for emb_name, data in bundle_embeddings.items():
-- 
cgit v1.2.3


From 0550659ce6e1c37d1ab05cb8a2cb31d499fa552f Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Thu, 19 Oct 2023 13:13:02 -0700
Subject: style: fix ambiguous variable name

---
 extensions-builtin/Lora/network_oft.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 0a87958e..4e8382c1 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -43,8 +43,8 @@ class NetworkModuleOFT(network.NetworkModule):
         norm_Q = torch.norm(block_Q.flatten())
         new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
         block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        I = torch.eye(self.block_size, device=self.oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
-        block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
+        m_I = torch.eye(self.block_size, device=self.oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
         #block_R_weighted = multiplier * block_R + (1 - multiplier) * I
         #R = torch.block_diag(*block_R_weighted)
         R = torch.block_diag(*block_R)
-- 
cgit v1.2.3


From 2d8c894b274d60a3e3563a2ace23c4ebcea9e652 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 21 Oct 2023 13:43:31 -0700
Subject: refactor: use forward hook instead of custom forward

---
 extensions-builtin/Lora/network_oft.py | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 4e8382c1..8e561ab0 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -36,9 +36,11 @@ class NetworkModuleOFT(network.NetworkModule):
     # how do we revert this to unload the weights?
     def apply_to(self):
         self.org_forward = self.org_module[0].forward
-        self.org_module[0].forward = self.forward
+        #self.org_module[0].forward = self.forward
+        self.org_module[0].register_forward_hook(self.forward_hook)
 
     def get_weight(self, oft_blocks, multiplier=None):
+        self.constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
         block_Q = oft_blocks - oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
         new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
@@ -66,14 +68,10 @@ class NetworkModuleOFT(network.NetworkModule):
         output_shape = self.oft_blocks.shape
 
         return self.finalize_updown(updown, orig_weight, output_shape)
-
-    def forward(self, x, y=None):
-        x = self.org_forward(x)
-        if self.multiplier() == 0.0:
-            return x
-
-        # calculating R here is excruciatingly slow
-        #R = self.get_weight().to(x.device, dtype=x.dtype)
+    
+    def forward_hook(self, module, args, output):
+        #print(f'Forward hook in {self.network_key} called')
+        x = output
         R = self.R.to(x.device, dtype=x.dtype)
 
         if x.dim() == 4:
@@ -83,3 +81,20 @@ class NetworkModuleOFT(network.NetworkModule):
         else:
             x = torch.matmul(x, R)
         return x
+
+    # def forward(self, x, y=None):
+    #     x = self.org_forward(x)
+    #     if self.multiplier() == 0.0:
+    #         return x
+
+    #     # calculating R here is excruciatingly slow
+    #     #R = self.get_weight().to(x.device, dtype=x.dtype)
+    #     R = self.R.to(x.device, dtype=x.dtype)
+
+    #     if x.dim() == 4:
+    #         x = x.permute(0, 2, 3, 1)
+    #         x = torch.matmul(x, R)
+    #         x = x.permute(0, 3, 1, 2)
+    #     else:
+    #         x = torch.matmul(x, R)
+    #     return x
-- 
cgit v1.2.3


From 768354772853a1d27a9bf7e41bd6a6e4eac7a9c7 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 21 Oct 2023 14:42:24 -0700
Subject: fix: return orig weights during updown, merge weights before forward

---
 extensions-builtin/Lora/network_oft.py | 90 ++++++++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 21 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 8e561ab0..f5f32c23 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -1,5 +1,6 @@
 import torch
 import network
+from modules import devices
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -29,23 +30,56 @@ class NetworkModuleOFT(network.NetworkModule):
         self.block_size = self.out_dim // self.num_blocks
 
         self.org_module: list[torch.Module] = [self.sd_module]
+        self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True)
+        #self.org_weight = self.org_module[0].weight.to(devices.cpu, copy=True)
         self.R = self.get_weight(self.oft_blocks)
+
+        self.merged_weight = self.merge_weight()
         self.apply_to()
+        self.merged = False
+
+
+    def merge_weight(self):
+        org_sd = self.org_module[0].state_dict()
+        R = self.R.to(self.org_weight.device, dtype=self.org_weight.dtype)
+        if self.org_weight.dim() == 4:
+            weight = torch.einsum("oihw, op -> pihw", self.org_weight, R)
+        else:
+            weight = torch.einsum("oi, op -> pi", self.org_weight, R)
+        org_sd['weight'] = weight
+        # replace weight
+        #self.org_module[0].load_state_dict(org_sd)
+        return weight
+        pass
+    
+    def replace_weight(self, new_weight):
+        org_sd = self.org_module[0].state_dict()
+        org_sd['weight'] = new_weight
+        self.org_module[0].load_state_dict(org_sd)
+        self.merged = True
+
+    def restore_weight(self):
+        org_sd = self.org_module[0].state_dict()
+        org_sd['weight'] = self.org_weight
+        self.org_module[0].load_state_dict(org_sd)
+        self.merged = False
+
 
     # replace forward method of original linear rather than replacing the module
     # how do we revert this to unload the weights?
     def apply_to(self):
         self.org_forward = self.org_module[0].forward
         #self.org_module[0].forward = self.forward
+        self.org_module[0].register_forward_pre_hook(self.pre_forward_hook)
         self.org_module[0].register_forward_hook(self.forward_hook)
 
     def get_weight(self, oft_blocks, multiplier=None):
-        self.constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
+        constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
         block_Q = oft_blocks - oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
-        new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
+        new_norm_Q = torch.clamp(norm_Q, max=constraint)
         block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        m_I = torch.eye(self.block_size, device=self.oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
         block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
         #block_R_weighted = multiplier * block_R + (1 - multiplier) * I
         #R = torch.block_diag(*block_R_weighted)
@@ -54,33 +88,47 @@ class NetworkModuleOFT(network.NetworkModule):
         return R
 
     def calc_updown(self, orig_weight):
-        oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        #oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
 
-        R = self.get_weight(oft_blocks)
-        self.R = R
+        #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
+        ##self.R = R
 
-        # if orig_weight.dim() == 4:
-        #     weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
-        # else:
-        #     weight = torch.einsum("oi, op -> pi", orig_weight, R)
+        #if orig_weight.dim() == 4:
+        #    weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
+        #else:
+        #    weight = torch.einsum("oi, op -> pi", orig_weight, R)
 
-        updown = orig_weight @ R
-        output_shape = self.oft_blocks.shape
+        #updown = orig_weight @ R
+        #updown = weight
+        updown = torch.zeros_like(orig_weight, device=orig_weight.device, dtype=orig_weight.dtype)
+        #updown = orig_weight
+        output_shape = orig_weight.shape
+        #orig_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        #output_shape = self.oft_blocks.shape
 
         return self.finalize_updown(updown, orig_weight, output_shape)
     
+    def pre_forward_hook(self, module, input):
+        if not self.merged:
+            self.replace_weight(self.merged_weight)
+
+    
     def forward_hook(self, module, args, output):
+        if self.merged:
+            pass
+            #self.restore_weight()
         #print(f'Forward hook in {self.network_key} called')
-        x = output
-        R = self.R.to(x.device, dtype=x.dtype)
 
-        if x.dim() == 4:
-            x = x.permute(0, 2, 3, 1)
-            x = torch.matmul(x, R)
-            x = x.permute(0, 3, 1, 2)
-        else:
-            x = torch.matmul(x, R)
-        return x
+        #x = output
+        #R = self.R.to(x.device, dtype=x.dtype)
+
+        #if x.dim() == 4:
+        #    x = x.permute(0, 2, 3, 1)
+        #    x = torch.matmul(x, R)
+        #    x = x.permute(0, 3, 1, 2)
+        #else:
+        #    x = torch.matmul(x, R)
+        #return x
 
     # def forward(self, x, y=None):
     #     x = self.org_forward(x)
-- 
cgit v1.2.3


From fce86ab7d75690785f0f5b496f1b3aee922c0ae3 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 21 Oct 2023 16:03:54 -0700
Subject: fix: support multiplier, no forward pass hook

---
 extensions-builtin/Lora/network_oft.py | 43 ++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 10 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index f5f32c23..e0672ba6 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -32,21 +32,27 @@ class NetworkModuleOFT(network.NetworkModule):
         self.org_module: list[torch.Module] = [self.sd_module]
         self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True)
         #self.org_weight = self.org_module[0].weight.to(devices.cpu, copy=True)
-        self.R = self.get_weight(self.oft_blocks)
+        init_multiplier = self.multiplier() * self.calc_scale()
+        self.last_multiplier = init_multiplier
+        self.R = self.get_weight(self.oft_blocks, init_multiplier)
 
         self.merged_weight = self.merge_weight()
         self.apply_to()
         self.merged = False
 
+        # weights_backup = getattr(self.org_module[0], 'network_weights_backup', None)
+        # if weights_backup is None:
+        #     self.org_module[0].network_weights_backup = self.org_weight
+
 
     def merge_weight(self):
-        org_sd = self.org_module[0].state_dict()
+        #org_sd = self.org_module[0].state_dict()
         R = self.R.to(self.org_weight.device, dtype=self.org_weight.dtype)
         if self.org_weight.dim() == 4:
             weight = torch.einsum("oihw, op -> pihw", self.org_weight, R)
         else:
             weight = torch.einsum("oi, op -> pi", self.org_weight, R)
-        org_sd['weight'] = weight
+        #org_sd['weight'] = weight
         # replace weight
         #self.org_module[0].load_state_dict(org_sd)
         return weight
@@ -74,6 +80,7 @@ class NetworkModuleOFT(network.NetworkModule):
         self.org_module[0].register_forward_hook(self.forward_hook)
 
     def get_weight(self, oft_blocks, multiplier=None):
+        multiplier = multiplier.to(oft_blocks.device, dtype=oft_blocks.dtype)
         constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
         block_Q = oft_blocks - oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
@@ -81,9 +88,9 @@ class NetworkModuleOFT(network.NetworkModule):
         block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
         m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
         block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
-        #block_R_weighted = multiplier * block_R + (1 - multiplier) * I
-        #R = torch.block_diag(*block_R_weighted)
-        R = torch.block_diag(*block_R)
+        block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
+        R = torch.block_diag(*block_R_weighted)
+        #R = torch.block_diag(*block_R)
 
         return R
 
@@ -93,6 +100,8 @@ class NetworkModuleOFT(network.NetworkModule):
         #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
         ##self.R = R
 
+        #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
+        ##self.R = R
         #if orig_weight.dim() == 4:
         #    weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
         #else:
@@ -103,19 +112,33 @@ class NetworkModuleOFT(network.NetworkModule):
         updown = torch.zeros_like(orig_weight, device=orig_weight.device, dtype=orig_weight.dtype)
         #updown = orig_weight
         output_shape = orig_weight.shape
-        #orig_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        orig_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
         #output_shape = self.oft_blocks.shape
 
         return self.finalize_updown(updown, orig_weight, output_shape)
     
     def pre_forward_hook(self, module, input):
-        if not self.merged:
+        multiplier = self.multiplier() * self.calc_scale()
+        if not multiplier==self.last_multiplier or not self.merged:
+
+        #if multiplier != self.last_multiplier or not self.merged:
+            self.R = self.get_weight(self.oft_blocks, multiplier)
+            self.last_multiplier = multiplier
+            self.merged_weight = self.merge_weight()
             self.replace_weight(self.merged_weight)
+        #elif not self.merged:
+        #    self.replace_weight(self.merged_weight)
 
     
     def forward_hook(self, module, args, output):
-        if self.merged:
-            pass
+        pass
+        #output = output * self.multiplier() * self.calc_scale()
+        #if len(args) > 0:
+        #    y = args[0]
+        #    output = output + y
+        #return output
+        #if self.merged:
+        #    pass
             #self.restore_weight()
         #print(f'Forward hook in {self.network_key} called')
 
-- 
cgit v1.2.3


From 76f5abdbdb739133eff2ccefa36eac62bea3fa08 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 21 Oct 2023 16:07:45 -0700
Subject: style: cleanup oft

---
 extensions-builtin/Lora/network_oft.py | 82 +++-------------------------------
 1 file changed, 7 insertions(+), 75 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index e0672ba6..e462ccb1 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -1,6 +1,5 @@
 import torch
 import network
-from modules import devices
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -31,33 +30,24 @@ class NetworkModuleOFT(network.NetworkModule):
 
         self.org_module: list[torch.Module] = [self.sd_module]
         self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True)
-        #self.org_weight = self.org_module[0].weight.to(devices.cpu, copy=True)
+
         init_multiplier = self.multiplier() * self.calc_scale()
         self.last_multiplier = init_multiplier
+
         self.R = self.get_weight(self.oft_blocks, init_multiplier)
 
         self.merged_weight = self.merge_weight()
         self.apply_to()
         self.merged = False
 
-        # weights_backup = getattr(self.org_module[0], 'network_weights_backup', None)
-        # if weights_backup is None:
-        #     self.org_module[0].network_weights_backup = self.org_weight
-
-
     def merge_weight(self):
-        #org_sd = self.org_module[0].state_dict()
         R = self.R.to(self.org_weight.device, dtype=self.org_weight.dtype)
         if self.org_weight.dim() == 4:
             weight = torch.einsum("oihw, op -> pihw", self.org_weight, R)
         else:
             weight = torch.einsum("oi, op -> pi", self.org_weight, R)
-        #org_sd['weight'] = weight
-        # replace weight
-        #self.org_module[0].load_state_dict(org_sd)
         return weight
-        pass
-    
+
     def replace_weight(self, new_weight):
         org_sd = self.org_module[0].state_dict()
         org_sd['weight'] = new_weight
@@ -70,9 +60,7 @@ class NetworkModuleOFT(network.NetworkModule):
         self.org_module[0].load_state_dict(org_sd)
         self.merged = False
 
-
-    # replace forward method of original linear rather than replacing the module
-    # how do we revert this to unload the weights?
+    # FIXME: hook forward method of original linear, but how do we undo the hook when we are done?
     def apply_to(self):
         self.org_forward = self.org_module[0].forward
         #self.org_module[0].forward = self.forward
@@ -90,82 +78,26 @@ class NetworkModuleOFT(network.NetworkModule):
         block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
         block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
         R = torch.block_diag(*block_R_weighted)
-        #R = torch.block_diag(*block_R)
 
         return R
 
     def calc_updown(self, orig_weight):
-        #oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-
-        #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
-        ##self.R = R
-
-        #R = self.R.to(orig_weight.device, dtype=orig_weight.dtype)
-        ##self.R = R
-        #if orig_weight.dim() == 4:
-        #    weight = torch.einsum("oihw, op -> pihw", orig_weight, R)
-        #else:
-        #    weight = torch.einsum("oi, op -> pi", orig_weight, R)
-
-        #updown = orig_weight @ R
-        #updown = weight
         updown = torch.zeros_like(orig_weight, device=orig_weight.device, dtype=orig_weight.dtype)
-        #updown = orig_weight
         output_shape = orig_weight.shape
         orig_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
         #output_shape = self.oft_blocks.shape
 
         return self.finalize_updown(updown, orig_weight, output_shape)
-    
+
     def pre_forward_hook(self, module, input):
         multiplier = self.multiplier() * self.calc_scale()
-        if not multiplier==self.last_multiplier or not self.merged:
 
-        #if multiplier != self.last_multiplier or not self.merged:
+        if not multiplier==self.last_multiplier or not self.merged:
             self.R = self.get_weight(self.oft_blocks, multiplier)
             self.last_multiplier = multiplier
             self.merged_weight = self.merge_weight()
             self.replace_weight(self.merged_weight)
-        #elif not self.merged:
-        #    self.replace_weight(self.merged_weight)
 
-    
+
     def forward_hook(self, module, args, output):
         pass
-        #output = output * self.multiplier() * self.calc_scale()
-        #if len(args) > 0:
-        #    y = args[0]
-        #    output = output + y
-        #return output
-        #if self.merged:
-        #    pass
-            #self.restore_weight()
-        #print(f'Forward hook in {self.network_key} called')
-
-        #x = output
-        #R = self.R.to(x.device, dtype=x.dtype)
-
-        #if x.dim() == 4:
-        #    x = x.permute(0, 2, 3, 1)
-        #    x = torch.matmul(x, R)
-        #    x = x.permute(0, 3, 1, 2)
-        #else:
-        #    x = torch.matmul(x, R)
-        #return x
-
-    # def forward(self, x, y=None):
-    #     x = self.org_forward(x)
-    #     if self.multiplier() == 0.0:
-    #         return x
-
-    #     # calculating R here is excruciatingly slow
-    #     #R = self.get_weight().to(x.device, dtype=x.dtype)
-    #     R = self.R.to(x.device, dtype=x.dtype)
-
-    #     if x.dim() == 4:
-    #         x = x.permute(0, 2, 3, 1)
-    #         x = torch.matmul(x, R)
-    #         x = x.permute(0, 3, 1, 2)
-    #     else:
-    #         x = torch.matmul(x, R)
-    #     return x
-- 
cgit v1.2.3


From de8ee92ed88b855098e273f576a27f4789f0693d Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 21 Oct 2023 17:37:17 -0700
Subject: fix: use merge_weight to cache value

---
 extensions-builtin/Lora/network_oft.py | 57 ++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 17 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index e462ccb1..ebe6740c 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -29,23 +29,27 @@ class NetworkModuleOFT(network.NetworkModule):
         self.block_size = self.out_dim // self.num_blocks
 
         self.org_module: list[torch.Module] = [self.sd_module]
-        self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True)
+        #self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True)
 
         init_multiplier = self.multiplier() * self.calc_scale()
         self.last_multiplier = init_multiplier
 
         self.R = self.get_weight(self.oft_blocks, init_multiplier)
 
+        self.hooks = []
         self.merged_weight = self.merge_weight()
-        self.apply_to()
+
+        #self.apply_to()
+        self.applied = False
         self.merged = False
 
     def merge_weight(self):
-        R = self.R.to(self.org_weight.device, dtype=self.org_weight.dtype)
-        if self.org_weight.dim() == 4:
-            weight = torch.einsum("oihw, op -> pihw", self.org_weight, R)
+        org_weight = self.org_module[0].weight
+        R = self.R.to(org_weight.device, dtype=org_weight.dtype)
+        if org_weight.dim() == 4:
+            weight = torch.einsum("oihw, op -> pihw", org_weight, R)
         else:
-            weight = torch.einsum("oi, op -> pi", self.org_weight, R)
+            weight = torch.einsum("oi, op -> pi", org_weight, R)
         return weight
 
     def replace_weight(self, new_weight):
@@ -55,17 +59,29 @@ class NetworkModuleOFT(network.NetworkModule):
         self.merged = True
 
     def restore_weight(self):
-        org_sd = self.org_module[0].state_dict()
-        org_sd['weight'] = self.org_weight
-        self.org_module[0].load_state_dict(org_sd)
-        self.merged = False
+        pass
+        #org_sd = self.org_module[0].state_dict()
+        #org_sd['weight'] = self.org_weight
+        #self.org_module[0].load_state_dict(org_sd)
+        #self.merged = False
 
     # FIXME: hook forward method of original linear, but how do we undo the hook when we are done?
     def apply_to(self):
-        self.org_forward = self.org_module[0].forward
-        #self.org_module[0].forward = self.forward
-        self.org_module[0].register_forward_pre_hook(self.pre_forward_hook)
-        self.org_module[0].register_forward_hook(self.forward_hook)
+        if not self.applied:
+            self.org_forward = self.org_module[0].forward
+            #self.org_module[0].forward = self.forward
+            prehook = self.org_module[0].register_forward_pre_hook(self.pre_forward_hook)
+            hook = self.org_module[0].register_forward_hook(self.forward_hook)
+            self.hooks.append(prehook)
+            self.hooks.append(hook)
+            self.applied = True
+    
+    def remove_from(self):
+        if self.applied:
+            for hook in self.hooks:
+                hook.remove()
+            self.hooks = []
+            self.applied = False
 
     def get_weight(self, oft_blocks, multiplier=None):
         multiplier = multiplier.to(oft_blocks.device, dtype=oft_blocks.dtype)
@@ -82,14 +98,22 @@ class NetworkModuleOFT(network.NetworkModule):
         return R
 
     def calc_updown(self, orig_weight):
+        if not self.applied:
+            self.apply_to()
+
+        self.merged_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
+
         updown = torch.zeros_like(orig_weight, device=orig_weight.device, dtype=orig_weight.dtype)
         output_shape = orig_weight.shape
-        orig_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        orig_weight = self.merged_weight
         #output_shape = self.oft_blocks.shape
 
         return self.finalize_updown(updown, orig_weight, output_shape)
 
     def pre_forward_hook(self, module, input):
+        #if not self.applied:
+        #    self.apply_to()
+
         multiplier = self.multiplier() * self.calc_scale()
 
         if not multiplier==self.last_multiplier or not self.merged:
@@ -98,6 +122,5 @@ class NetworkModuleOFT(network.NetworkModule):
             self.merged_weight = self.merge_weight()
             self.replace_weight(self.merged_weight)
 
-
     def forward_hook(self, module, args, output):
-        pass
+        pass
\ No newline at end of file
-- 
cgit v1.2.3


From 4a50c9638c3eac860fb05ae603cd61aabf4cd1a9 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sun, 22 Oct 2023 08:54:24 -0700
Subject: refactor: remove used OFT functions

---
 extensions-builtin/Lora/network_oft.py | 82 +++++-----------------------------
 1 file changed, 10 insertions(+), 72 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index ebe6740c..3034a407 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -29,98 +29,36 @@ class NetworkModuleOFT(network.NetworkModule):
         self.block_size = self.out_dim // self.num_blocks
 
         self.org_module: list[torch.Module] = [self.sd_module]
-        #self.org_weight = self.org_module[0].weight.to(self.org_module[0].weight.device, copy=True)
 
-        init_multiplier = self.multiplier() * self.calc_scale()
-        self.last_multiplier = init_multiplier
-
-        self.R = self.get_weight(self.oft_blocks, init_multiplier)
-
-        self.hooks = []
-        self.merged_weight = self.merge_weight()
-
-        #self.apply_to()
-        self.applied = False
-        self.merged = False
-
-    def merge_weight(self):
-        org_weight = self.org_module[0].weight
-        R = self.R.to(org_weight.device, dtype=org_weight.dtype)
+    def merge_weight(self, R_weight, org_weight):
+        R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
         if org_weight.dim() == 4:
-            weight = torch.einsum("oihw, op -> pihw", org_weight, R)
+            weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
         else:
-            weight = torch.einsum("oi, op -> pi", org_weight, R)
+            weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
         return weight
 
-    def replace_weight(self, new_weight):
-        org_sd = self.org_module[0].state_dict()
-        org_sd['weight'] = new_weight
-        self.org_module[0].load_state_dict(org_sd)
-        self.merged = True
-
-    def restore_weight(self):
-        pass
-        #org_sd = self.org_module[0].state_dict()
-        #org_sd['weight'] = self.org_weight
-        #self.org_module[0].load_state_dict(org_sd)
-        #self.merged = False
-
-    # FIXME: hook forward method of original linear, but how do we undo the hook when we are done?
-    def apply_to(self):
-        if not self.applied:
-            self.org_forward = self.org_module[0].forward
-            #self.org_module[0].forward = self.forward
-            prehook = self.org_module[0].register_forward_pre_hook(self.pre_forward_hook)
-            hook = self.org_module[0].register_forward_hook(self.forward_hook)
-            self.hooks.append(prehook)
-            self.hooks.append(hook)
-            self.applied = True
-    
-    def remove_from(self):
-        if self.applied:
-            for hook in self.hooks:
-                hook.remove()
-            self.hooks = []
-            self.applied = False
-
     def get_weight(self, oft_blocks, multiplier=None):
-        multiplier = multiplier.to(oft_blocks.device, dtype=oft_blocks.dtype)
         constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
+
         block_Q = oft_blocks - oft_blocks.transpose(1, 2)
         norm_Q = torch.norm(block_Q.flatten())
         new_norm_Q = torch.clamp(norm_Q, max=constraint)
         block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
         m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
         block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
+
         block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
         R = torch.block_diag(*block_R_weighted)
 
         return R
 
     def calc_updown(self, orig_weight):
-        if not self.applied:
-            self.apply_to()
-
-        self.merged_weight = self.merged_weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        R = self.get_weight(self.oft_blocks, self.multiplier())
+        merged_weight = self.merge_weight(R, orig_weight)
 
-        updown = torch.zeros_like(orig_weight, device=orig_weight.device, dtype=orig_weight.dtype)
+        updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
         output_shape = orig_weight.shape
-        orig_weight = self.merged_weight
-        #output_shape = self.oft_blocks.shape
+        orig_weight = orig_weight
 
         return self.finalize_updown(updown, orig_weight, output_shape)
-
-    def pre_forward_hook(self, module, input):
-        #if not self.applied:
-        #    self.apply_to()
-
-        multiplier = self.multiplier() * self.calc_scale()
-
-        if not multiplier==self.last_multiplier or not self.merged:
-            self.R = self.get_weight(self.oft_blocks, multiplier)
-            self.last_multiplier = multiplier
-            self.merged_weight = self.merge_weight()
-            self.replace_weight(self.merged_weight)
-
-    def forward_hook(self, module, args, output):
-        pass
\ No newline at end of file
-- 
cgit v1.2.3


From 3b8515d2c9abad7f0ccaac0215803716e861ee0e Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sun, 22 Oct 2023 09:27:48 -0700
Subject: fix: multiplier applied twice in finalize_updown

---
 extensions-builtin/Lora/network_oft.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 3034a407..efbdd296 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -54,7 +54,8 @@ class NetworkModuleOFT(network.NetworkModule):
         return R
 
     def calc_updown(self, orig_weight):
-        R = self.get_weight(self.oft_blocks, self.multiplier())
+        multiplier = self.multiplier() * self.calc_scale()
+        R = self.get_weight(self.oft_blocks, multiplier)
         merged_weight = self.merge_weight(R, orig_weight)
 
         updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
@@ -62,3 +63,23 @@ class NetworkModuleOFT(network.NetworkModule):
         orig_weight = orig_weight
 
         return self.finalize_updown(updown, orig_weight, output_shape)
+    
+    # override to remove the multiplier/scale factor; it's already multiplied in get_weight
+    def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
+        #return super().finalize_updown(updown, orig_weight, output_shape, ex_bias)
+
+        if self.bias is not None:
+            updown = updown.reshape(self.bias.shape)
+            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown = updown.reshape(output_shape)
+
+        if len(output_shape) == 4:
+            updown = updown.reshape(output_shape)
+
+        if orig_weight.size().numel() == updown.size().numel():
+            updown = updown.reshape(orig_weight.shape)
+
+        if ex_bias is not None:
+            ex_bias = ex_bias * self.multiplier()
+
+        return updown, ex_bias
-- 
cgit v1.2.3


From 6523edb8a45d4e09f11f3b4e1d133afa6fb65e53 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sun, 22 Oct 2023 09:31:15 -0700
Subject: style: conform style

---
 extensions-builtin/Lora/network_oft.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index efbdd296..e43c9a1d 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -63,7 +63,7 @@ class NetworkModuleOFT(network.NetworkModule):
         orig_weight = orig_weight
 
         return self.finalize_updown(updown, orig_weight, output_shape)
-    
+
     # override to remove the multiplier/scale factor; it's already multiplied in get_weight
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         #return super().finalize_updown(updown, orig_weight, output_shape, ex_bias)
-- 
cgit v1.2.3


From a2fad6ee055f3f4e98e46b6c2d912776fe608214 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Wed, 1 Nov 2023 22:34:27 -0700
Subject: test implementation based on kohaku diag-oft implementation

---
 extensions-builtin/Lora/network_oft.py | 59 ++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 21 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index e43c9a1d..ff61b369 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -1,5 +1,6 @@
 import torch
 import network
+from einops import rearrange
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -30,35 +31,51 @@ class NetworkModuleOFT(network.NetworkModule):
 
         self.org_module: list[torch.Module] = [self.sd_module]
 
-    def merge_weight(self, R_weight, org_weight):
-        R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
-        if org_weight.dim() == 4:
-            weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
-        else:
-            weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
-        return weight
+    # def merge_weight(self, R_weight, org_weight):
+    #     R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
+    #     if org_weight.dim() == 4:
+    #         weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
+    #     else:
+    #         weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
+    #     weight = torch.einsum(
+    #         "k n m, k n ... -> k m ...", 
+    #         self.oft_diag * scale + torch.eye(self.block_size, device=device), 
+    #         org_weight
+    #     )
+    #     return weight
 
     def get_weight(self, oft_blocks, multiplier=None):
-        constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
+        # constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
 
-        block_Q = oft_blocks - oft_blocks.transpose(1, 2)
-        norm_Q = torch.norm(block_Q.flatten())
-        new_norm_Q = torch.clamp(norm_Q, max=constraint)
-        block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
-        block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
+        # block_Q = oft_blocks - oft_blocks.transpose(1, 2)
+        # norm_Q = torch.norm(block_Q.flatten())
+        # new_norm_Q = torch.clamp(norm_Q, max=constraint)
+        # block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+        # m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        # block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
 
-        block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
-        R = torch.block_diag(*block_R_weighted)
+        # block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
+        # R = torch.block_diag(*block_R_weighted)
+        #return R
+        return self.oft_blocks
 
-        return R
 
     def calc_updown(self, orig_weight):
         multiplier = self.multiplier() * self.calc_scale()
-        R = self.get_weight(self.oft_blocks, multiplier)
-        merged_weight = self.merge_weight(R, orig_weight)
-
-        updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+        #R = self.get_weight(self.oft_blocks, multiplier)
+        R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        #merged_weight = self.merge_weight(R, orig_weight)
+
+        orig_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+        weight = torch.einsum(
+            'k n m, k n ... -> k m ...',
+            R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
+            orig_weight
+        )
+        weight = rearrange(weight, 'k m ... -> (k m) ...')
+
+        #updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+        updown = weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
         output_shape = orig_weight.shape
         orig_weight = orig_weight
 
-- 
cgit v1.2.3


From 65ccd6305fcf72347d5ed68f03095dced865ef6e Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Thu, 2 Nov 2023 00:11:32 -0700
Subject: detect diag_oft type

---
 extensions-builtin/Lora/networks.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 78a97033..7f814706 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -191,10 +191,17 @@ def load_network(name, network_on_disk):
                 key = key_network_without_network_parts.replace("lora_te1_text_model", "transformer_text_model")
                 sd_module = shared.sd_model.network_layer_mapping.get(key, None)
 
+        # kohya_ss OFT module
         elif sd_module is None and "oft_unet" in key_network_without_network_parts:
             key = key_network_without_network_parts.replace("oft_unet", "diffusion_model")
             sd_module = shared.sd_model.network_layer_mapping.get(key, None)
 
+        # KohakuBlueLeaf OFT module
+        if sd_module is None and "oft_diag" in key:
+            key = key_network_without_network_parts.replace("lora_unet", "diffusion_model")
+            key = key_network_without_network_parts.replace("lora_te1_text_model", "0_transformer_text_model")
+            sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+
         if sd_module is None:
             keys_failed_to_match[key_network] = key
             continue
-- 
cgit v1.2.3


From d727ddfccdc6d474767be9dc3bf504150e81a8a5 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Thu, 2 Nov 2023 00:13:11 -0700
Subject: no idea what i'm doing, trying to support both type of OFT, kblueleaf
 diag_oft has MultiheadAttn which kohya's doesn't?, attempt create new module
 based off network_lora.py, errors about tensor dim mismatch

---
 extensions-builtin/Lora/network_oft.py | 192 +++++++++++++++++++++++++--------
 1 file changed, 145 insertions(+), 47 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index ff61b369..e102eafc 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -1,11 +1,12 @@
 import torch
 import network
 from einops import rearrange
+from modules import devices
 
 
 class ModuleTypeOFT(network.ModuleType):
     def create_module(self, net: network.Network, weights: network.NetworkWeights):
-        if all(x in weights.w for x in ["oft_blocks"]):
+        if all(x in weights.w for x in ["oft_blocks"]) or all(x in weights.w for x in ["oft_diag"]):
             return NetworkModuleOFT(net, weights)
 
         return None
@@ -16,66 +17,117 @@ class NetworkModuleOFT(network.NetworkModule):
 
         super().__init__(net, weights)
 
-        self.oft_blocks = weights.w["oft_blocks"]
-        self.alpha = weights.w["alpha"]
-        self.dim = self.oft_blocks.shape[0]
-        self.num_blocks = self.dim
-
-        if "Linear" in self.sd_module.__class__.__name__:
+        self.lin_module = None
+        # kohya-ss
+        if "oft_blocks" in weights.w.keys():
+            self.is_kohya = True
+            self.oft_blocks = weights.w["oft_blocks"]
+            self.alpha = weights.w["alpha"]
+            self.dim = self.oft_blocks.shape[0]
+        elif "oft_diag" in weights.w.keys():
+            self.is_kohya = False
+            self.oft_blocks = weights.w["oft_diag"]
+            # alpha is rank if alpha is 0 or None
+            if self.alpha is None:
+                pass
+            self.dim = self.oft_blocks.shape[0] # FIXME: almost certainly incorrect, assumes tensor is shape [*, m, n]
+        else:
+            raise ValueError("oft_blocks or oft_diag must be in weights dict")
+
+        is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
+        is_conv = type(self.sd_module) in [torch.nn.Conv2d]
+        is_other_linear = type(self.sd_module) in [ torch.nn.MultiheadAttention]
+        #if "Linear" in self.sd_module.__class__.__name__ or is_linear:
+        if is_linear:
             self.out_dim = self.sd_module.out_features
-        elif "Conv" in self.sd_module.__class__.__name__:
+            #elif hasattr(self.sd_module, "embed_dim"):
+            #    self.out_dim = self.sd_module.embed_dim
+            #else:
+            #    raise ValueError("Linear sd_module must have out_features or embed_dim")
+        elif is_other_linear:
+            self.out_dim = self.sd_module.embed_dim
+        elif is_conv:
             self.out_dim = self.sd_module.out_channels
+        else:
+            raise ValueError("sd_module must be Linear or Conv")
+
 
-        self.constraint = self.alpha * self.out_dim
-        self.block_size = self.out_dim // self.num_blocks
+        if self.is_kohya:
+            self.num_blocks = self.dim
+            self.block_size = self.out_dim // self.num_blocks
+            self.constraint = self.alpha * self.out_dim
+        #elif is_linear or is_conv:
+        else:
+            self.num_blocks, self.block_size = factorization(self.out_dim, self.dim)
+            self.constraint = None
 
         self.org_module: list[torch.Module] = [self.sd_module]
 
-    # def merge_weight(self, R_weight, org_weight):
-    #     R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
-    #     if org_weight.dim() == 4:
-    #         weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
-    #     else:
-    #         weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
-    #     weight = torch.einsum(
-    #         "k n m, k n ... -> k m ...", 
-    #         self.oft_diag * scale + torch.eye(self.block_size, device=device), 
-    #         org_weight
-    #     )
-    #     return weight
+        # if is_other_linear:
+        #     weight = self.oft_blocks.reshape(self.oft_blocks.shape[0], -1)
+        #     module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+        #     with torch.no_grad():
+        #         if weight.shape != module.weight.shape:
+        #             weight = weight.reshape(module.weight.shape)
+        #         module.weight.copy_(weight)
+        #     module.to(device=devices.cpu, dtype=devices.dtype)
+        #     module.weight.requires_grad_(False)
+        #     self.lin_module = module
+            #return module
+
+    def merge_weight(self, R_weight, org_weight):
+        R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
+        if org_weight.dim() == 4:
+            weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
+        else:
+            weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
+        #weight = torch.einsum(
+        #    "k n m, k n ... -> k m ...", 
+        #    self.oft_diag * scale + torch.eye(self.block_size, device=device), 
+        #    org_weight
+        #)
+        return weight
 
     def get_weight(self, oft_blocks, multiplier=None):
-        # constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
+        if self.constraint is not None:
+            constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
 
-        # block_Q = oft_blocks - oft_blocks.transpose(1, 2)
-        # norm_Q = torch.norm(block_Q.flatten())
-        # new_norm_Q = torch.clamp(norm_Q, max=constraint)
-        # block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        # m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
-        # block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
+        block_Q = oft_blocks - oft_blocks.transpose(1, 2)
+        norm_Q = torch.norm(block_Q.flatten())
+        if self.constraint is not None:
+            new_norm_Q = torch.clamp(norm_Q, max=constraint)
+        else:
+            new_norm_Q = norm_Q
+        block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+        m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
 
-        # block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
-        # R = torch.block_diag(*block_R_weighted)
-        #return R
-        return self.oft_blocks
+        block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
+        R = torch.block_diag(*block_R_weighted)
+        return R
+        #return self.oft_blocks
 
 
     def calc_updown(self, orig_weight):
         multiplier = self.multiplier() * self.calc_scale()
-        #R = self.get_weight(self.oft_blocks, multiplier)
-        R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-        #merged_weight = self.merge_weight(R, orig_weight)
-
-        orig_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
-        weight = torch.einsum(
-            'k n m, k n ... -> k m ...',
-            R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
-            orig_weight
-        )
-        weight = rearrange(weight, 'k m ... -> (k m) ...')
-
-        #updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
-        updown = weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+        R = self.get_weight(self.oft_blocks, multiplier)
+        #R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        merged_weight = self.merge_weight(R, orig_weight)
+
+        #if self.lin_module is not None:
+        #    R = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        #    weight = torch.mul(torch.mul(R, multiplier), orig_weight)
+        #else:
+        #    orig_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+        #    weight = torch.einsum(
+        #        'k n m, k n ... -> k m ...',
+        #        R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
+        #        orig_weight
+        #    )
+        #    weight = rearrange(weight, 'k m ... -> (k m) ...')
+
+        updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+        #updown = weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
         output_shape = orig_weight.shape
         orig_weight = orig_weight
 
@@ -100,3 +152,49 @@ class NetworkModuleOFT(network.NetworkModule):
             ex_bias = ex_bias * self.multiplier()
 
         return updown, ex_bias
+    
+# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
+def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
+    '''
+    return a tuple of two value of input dimension decomposed by the number closest to factor
+    second value is higher or equal than first value.
+    
+    In LoRA with Kroneckor Product, first value is a value for weight scale.
+    secon value is a value for weight.
+    
+    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+    
+    examples)
+    factor
+        -1               2                4               8               16               ...
+    127 -> 1, 127   127 -> 1, 127    127 -> 1, 127   127 -> 1, 127   127 -> 1, 127
+    128 -> 8, 16    128 -> 2, 64     128 -> 4, 32    128 -> 8, 16    128 -> 8, 16
+    250 -> 10, 25   250 -> 2, 125    250 -> 2, 125   250 -> 5, 50    250 -> 10, 25
+    360 -> 8, 45    360 -> 2, 180    360 -> 4, 90    360 -> 8, 45    360 -> 12, 30
+    512 -> 16, 32   512 -> 2, 256    512 -> 4, 128   512 -> 8, 64    512 -> 16, 32
+    1024 -> 32, 32  1024 -> 2, 512   1024 -> 4, 256  1024 -> 8, 128  1024 -> 16, 64
+    '''
+    
+    if factor > 0 and (dimension % factor) == 0:
+        m = factor
+        n = dimension // factor
+        if m > n:
+            n, m = m, n
+        return m, n
+    if factor < 0:
+        factor = dimension
+    m, n = 1, dimension
+    length = m + n
+    while m<n:
+        new_m = m + 1
+        while dimension%new_m != 0:
+            new_m += 1
+        new_n = dimension // new_m
+        if new_m + new_n > length or new_m>factor:
+            break
+        else:
+            m, n = new_m, new_n
+    if m > n:
+        n, m = m, n
+    return m, n
+
-- 
cgit v1.2.3


From fe1967a4c4a02eccfa45b65ee19a5b0773ced31c Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Fri, 3 Nov 2023 17:52:55 -0700
Subject: skip multihead attn for now

---
 extensions-builtin/Lora/network_oft.py | 54 +++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 17 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index e102eafc..979a2047 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -18,6 +18,7 @@ class NetworkModuleOFT(network.NetworkModule):
         super().__init__(net, weights)
 
         self.lin_module = None
+        self.org_module: list[torch.Module] = [self.sd_module]
         # kohya-ss
         if "oft_blocks" in weights.w.keys():
             self.is_kohya = True
@@ -30,7 +31,7 @@ class NetworkModuleOFT(network.NetworkModule):
             # alpha is rank if alpha is 0 or None
             if self.alpha is None:
                 pass
-            self.dim = self.oft_blocks.shape[0] # FIXME: almost certainly incorrect, assumes tensor is shape [*, m, n]
+            self.dim = self.oft_blocks.shape[1] # FIXME: almost certainly incorrect, assumes tensor is shape [*, m, n]
         else:
             raise ValueError("oft_blocks or oft_diag must be in weights dict")
 
@@ -46,6 +47,12 @@ class NetworkModuleOFT(network.NetworkModule):
             #    raise ValueError("Linear sd_module must have out_features or embed_dim")
         elif is_other_linear:
             self.out_dim = self.sd_module.embed_dim
+            #self.org_weight = self.org_module[0].weight
+#            if hasattr(self.sd_module, "in_proj_weight"):
+#                self.in_proj_dim = self.sd_module.in_proj_weight.shape[1]
+#            if hasattr(self.sd_module, "out_proj_weight"):
+#                self.out_proj_dim = self.sd_module.out_proj_weight.shape[0]
+#            self.in_proj_dim = self.sd_module.in_proj_weight.shape[1]
         elif is_conv:
             self.out_dim = self.sd_module.out_channels
         else:
@@ -58,10 +65,9 @@ class NetworkModuleOFT(network.NetworkModule):
             self.constraint = self.alpha * self.out_dim
         #elif is_linear or is_conv:
         else:
-            self.num_blocks, self.block_size = factorization(self.out_dim, self.dim)
+            self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
             self.constraint = None
 
-        self.org_module: list[torch.Module] = [self.sd_module]
 
         # if is_other_linear:
         #     weight = self.oft_blocks.reshape(self.oft_blocks.shape[0], -1)
@@ -110,25 +116,39 @@ class NetworkModuleOFT(network.NetworkModule):
 
     def calc_updown(self, orig_weight):
         multiplier = self.multiplier() * self.calc_scale()
-        R = self.get_weight(self.oft_blocks, multiplier)
-        #R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-        merged_weight = self.merge_weight(R, orig_weight)
+        is_other_linear = type(self.sd_module) in [ torch.nn.MultiheadAttention]
+        if self.is_kohya and not is_other_linear:
+            R = self.get_weight(self.oft_blocks, multiplier)
+            #R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+            merged_weight = self.merge_weight(R, orig_weight)
+        elif not self.is_kohya and not is_other_linear:
+            if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
+                orig_weight=orig_weight.permute(1, 0)
+            R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+            merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+            #orig_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.block_size, n=self.num_blocks)
+            merged_weight = torch.einsum(
+                'k n m, k n ... -> k m ...',
+                R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
+                merged_weight 
+            )
+            merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
+            if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
+                orig_weight=orig_weight.permute(1, 0)
+                #merged_weight=merged_weight.permute(1, 0)
+            updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+            #updown = weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+            output_shape = orig_weight.shape
+        else:
+            # skip for now
+            updown = torch.zeros([orig_weight.shape[1], orig_weight.shape[1]], device=orig_weight.device, dtype=orig_weight.dtype)
+            output_shape = (orig_weight.shape[1], orig_weight.shape[1])
 
         #if self.lin_module is not None:
         #    R = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
         #    weight = torch.mul(torch.mul(R, multiplier), orig_weight)
         #else:
-        #    orig_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
-        #    weight = torch.einsum(
-        #        'k n m, k n ... -> k m ...',
-        #        R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
-        #        orig_weight
-        #    )
-        #    weight = rearrange(weight, 'k m ... -> (k m) ...')
-
-        updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
-        #updown = weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
-        output_shape = orig_weight.shape
+
         orig_weight = orig_weight
 
         return self.finalize_updown(updown, orig_weight, output_shape)
-- 
cgit v1.2.3


From f6c8201e5663ca2182a66c8eca63ce4801d52849 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Fri, 3 Nov 2023 19:35:15 -0700
Subject: refactor: move factorization to lyco_helpers, separate calc_updown
 for kohya and kb

---
 extensions-builtin/Lora/lyco_helpers.py |  47 ++++++++++++
 extensions-builtin/Lora/network_oft.py  | 131 ++++++++------------------------
 2 files changed, 77 insertions(+), 101 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/lyco_helpers.py b/extensions-builtin/Lora/lyco_helpers.py
index 279b34bc..1679a0ce 100644
--- a/extensions-builtin/Lora/lyco_helpers.py
+++ b/extensions-builtin/Lora/lyco_helpers.py
@@ -19,3 +19,50 @@ def rebuild_cp_decomposition(up, down, mid):
     up = up.reshape(up.size(0), -1)
     down = down.reshape(down.size(0), -1)
     return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
+
+
+# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
+def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
+    '''
+    return a tuple of two value of input dimension decomposed by the number closest to factor
+    second value is higher or equal than first value.
+
+    In LoRA with Kroneckor Product, first value is a value for weight scale.
+    secon value is a value for weight.
+
+    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+
+    examples)
+    factor
+        -1               2                4               8               16               ...
+    127 -> 1, 127   127 -> 1, 127    127 -> 1, 127   127 -> 1, 127   127 -> 1, 127
+    128 -> 8, 16    128 -> 2, 64     128 -> 4, 32    128 -> 8, 16    128 -> 8, 16
+    250 -> 10, 25   250 -> 2, 125    250 -> 2, 125   250 -> 5, 50    250 -> 10, 25
+    360 -> 8, 45    360 -> 2, 180    360 -> 4, 90    360 -> 8, 45    360 -> 12, 30
+    512 -> 16, 32   512 -> 2, 256    512 -> 4, 128   512 -> 8, 64    512 -> 16, 32
+    1024 -> 32, 32  1024 -> 2, 512   1024 -> 4, 256  1024 -> 8, 128  1024 -> 16, 64
+    '''
+
+    if factor > 0 and (dimension % factor) == 0:
+        m = factor
+        n = dimension // factor
+        if m > n:
+            n, m = m, n
+        return m, n
+    if factor < 0:
+        factor = dimension
+    m, n = 1, dimension
+    length = m + n
+    while m<n:
+        new_m = m + 1
+        while dimension%new_m != 0:
+            new_m += 1
+        new_n = dimension // new_m
+        if new_m + new_n > length or new_m>factor:
+            break
+        else:
+            m, n = new_m, new_n
+    if m > n:
+        n, m = m, n
+    return m, n
+
diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 979a2047..2be67fe5 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -1,7 +1,7 @@
 import torch
 import network
+from lyco_helpers import factorization
 from einops import rearrange
-from modules import devices
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -11,7 +11,8 @@ class ModuleTypeOFT(network.ModuleType):
 
         return None
 
-# adapted from kohya's implementation https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# adapted from kohya-ss' implementation https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# and KohakuBlueleaf's implementation https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
 class NetworkModuleOFT(network.NetworkModule):
     def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
@@ -19,6 +20,7 @@ class NetworkModuleOFT(network.NetworkModule):
 
         self.lin_module = None
         self.org_module: list[torch.Module] = [self.sd_module]
+
         # kohya-ss
         if "oft_blocks" in weights.w.keys():
             self.is_kohya = True
@@ -37,61 +39,31 @@ class NetworkModuleOFT(network.NetworkModule):
 
         is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
         is_conv = type(self.sd_module) in [torch.nn.Conv2d]
-        is_other_linear = type(self.sd_module) in [ torch.nn.MultiheadAttention]
-        #if "Linear" in self.sd_module.__class__.__name__ or is_linear:
+        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention]
+
         if is_linear:
             self.out_dim = self.sd_module.out_features
-            #elif hasattr(self.sd_module, "embed_dim"):
-            #    self.out_dim = self.sd_module.embed_dim
-            #else:
-            #    raise ValueError("Linear sd_module must have out_features or embed_dim")
         elif is_other_linear:
             self.out_dim = self.sd_module.embed_dim
-            #self.org_weight = self.org_module[0].weight
-#            if hasattr(self.sd_module, "in_proj_weight"):
-#                self.in_proj_dim = self.sd_module.in_proj_weight.shape[1]
-#            if hasattr(self.sd_module, "out_proj_weight"):
-#                self.out_proj_dim = self.sd_module.out_proj_weight.shape[0]
-#            self.in_proj_dim = self.sd_module.in_proj_weight.shape[1]
         elif is_conv:
             self.out_dim = self.sd_module.out_channels
         else:
             raise ValueError("sd_module must be Linear or Conv")
 
-
         if self.is_kohya:
             self.num_blocks = self.dim
             self.block_size = self.out_dim // self.num_blocks
             self.constraint = self.alpha * self.out_dim
-        #elif is_linear or is_conv:
         else:
             self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
             self.constraint = None
 
-
-        # if is_other_linear:
-        #     weight = self.oft_blocks.reshape(self.oft_blocks.shape[0], -1)
-        #     module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
-        #     with torch.no_grad():
-        #         if weight.shape != module.weight.shape:
-        #             weight = weight.reshape(module.weight.shape)
-        #         module.weight.copy_(weight)
-        #     module.to(device=devices.cpu, dtype=devices.dtype)
-        #     module.weight.requires_grad_(False)
-        #     self.lin_module = module
-            #return module
-
     def merge_weight(self, R_weight, org_weight):
         R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
         if org_weight.dim() == 4:
             weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
         else:
             weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
-        #weight = torch.einsum(
-        #    "k n m, k n ... -> k m ...", 
-        #    self.oft_diag * scale + torch.eye(self.block_size, device=device), 
-        #    org_weight
-        #)
         return weight
 
     def get_weight(self, oft_blocks, multiplier=None):
@@ -111,48 +83,51 @@ class NetworkModuleOFT(network.NetworkModule):
         block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
         R = torch.block_diag(*block_R_weighted)
         return R
-        #return self.oft_blocks
 
+    def calc_updown_kohya(self, orig_weight, multiplier):
+        R = self.get_weight(self.oft_blocks, multiplier)
+        merged_weight = self.merge_weight(R, orig_weight)
 
-    def calc_updown(self, orig_weight):
-        multiplier = self.multiplier() * self.calc_scale()
-        is_other_linear = type(self.sd_module) in [ torch.nn.MultiheadAttention]
-        if self.is_kohya and not is_other_linear:
-            R = self.get_weight(self.oft_blocks, multiplier)
-            #R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-            merged_weight = self.merge_weight(R, orig_weight)
-        elif not self.is_kohya and not is_other_linear:
+        updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+        output_shape = orig_weight.shape
+        orig_weight = orig_weight
+        return self.finalize_updown(updown, orig_weight, output_shape)
+
+    def calc_updown_kb(self, orig_weight, multiplier):
+        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention]
+
+        if not is_other_linear:
             if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
                 orig_weight=orig_weight.permute(1, 0)
+
             R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
             merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
-            #orig_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.block_size, n=self.num_blocks)
             merged_weight = torch.einsum(
                 'k n m, k n ... -> k m ...',
                 R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
-                merged_weight 
+                merged_weight
             )
             merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
+
             if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
                 orig_weight=orig_weight.permute(1, 0)
-                #merged_weight=merged_weight.permute(1, 0)
+
             updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
-            #updown = weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
             output_shape = orig_weight.shape
         else:
-            # skip for now
+            # FIXME: skip MultiheadAttention for now
             updown = torch.zeros([orig_weight.shape[1], orig_weight.shape[1]], device=orig_weight.device, dtype=orig_weight.dtype)
             output_shape = (orig_weight.shape[1], orig_weight.shape[1])
 
-        #if self.lin_module is not None:
-        #    R = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
-        #    weight = torch.mul(torch.mul(R, multiplier), orig_weight)
-        #else:
-
-        orig_weight = orig_weight
-
         return self.finalize_updown(updown, orig_weight, output_shape)
 
+    def calc_updown(self, orig_weight):
+        multiplier = self.multiplier() * self.calc_scale()
+        if self.is_kohya:
+            return self.calc_updown_kohya(orig_weight, multiplier)
+        else:
+            return self.calc_updown_kb(orig_weight, multiplier)
+
     # override to remove the multiplier/scale factor; it's already multiplied in get_weight
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         #return super().finalize_updown(updown, orig_weight, output_shape, ex_bias)
@@ -172,49 +147,3 @@ class NetworkModuleOFT(network.NetworkModule):
             ex_bias = ex_bias * self.multiplier()
 
         return updown, ex_bias
-    
-# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
-def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
-    '''
-    return a tuple of two value of input dimension decomposed by the number closest to factor
-    second value is higher or equal than first value.
-    
-    In LoRA with Kroneckor Product, first value is a value for weight scale.
-    secon value is a value for weight.
-    
-    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
-    
-    examples)
-    factor
-        -1               2                4               8               16               ...
-    127 -> 1, 127   127 -> 1, 127    127 -> 1, 127   127 -> 1, 127   127 -> 1, 127
-    128 -> 8, 16    128 -> 2, 64     128 -> 4, 32    128 -> 8, 16    128 -> 8, 16
-    250 -> 10, 25   250 -> 2, 125    250 -> 2, 125   250 -> 5, 50    250 -> 10, 25
-    360 -> 8, 45    360 -> 2, 180    360 -> 4, 90    360 -> 8, 45    360 -> 12, 30
-    512 -> 16, 32   512 -> 2, 256    512 -> 4, 128   512 -> 8, 64    512 -> 16, 32
-    1024 -> 32, 32  1024 -> 2, 512   1024 -> 4, 256  1024 -> 8, 128  1024 -> 16, 64
-    '''
-    
-    if factor > 0 and (dimension % factor) == 0:
-        m = factor
-        n = dimension // factor
-        if m > n:
-            n, m = m, n
-        return m, n
-    if factor < 0:
-        factor = dimension
-    m, n = 1, dimension
-    length = m + n
-    while m<n:
-        new_m = m + 1
-        while dimension%new_m != 0:
-            new_m += 1
-        new_n = dimension // new_m
-        if new_m + new_n > length or new_m>factor:
-            break
-        else:
-            m, n = new_m, new_n
-    if m > n:
-        n, m = m, n
-    return m, n
-
-- 
cgit v1.2.3


From 329c8bacce706811776e1c1c6a0d39b46886a268 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 4 Nov 2023 14:54:36 -0700
Subject: refactor: use same updown for both kohya OFT and LyCORIS diag-oft

---
 extensions-builtin/Lora/network_oft.py | 91 +++++++++++++++++++++++++++-------
 1 file changed, 74 insertions(+), 17 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 2be67fe5..e4aa082b 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -2,6 +2,7 @@ import torch
 import network
 from lyco_helpers import factorization
 from einops import rearrange
+from modules import devices
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -24,12 +25,14 @@ class NetworkModuleOFT(network.NetworkModule):
         # kohya-ss
         if "oft_blocks" in weights.w.keys():
             self.is_kohya = True
-            self.oft_blocks = weights.w["oft_blocks"]
+            self.oft_blocks = weights.w["oft_blocks"] # (num_blocks, block_size, block_size)
             self.alpha = weights.w["alpha"]
-            self.dim = self.oft_blocks.shape[0]
+            self.dim = self.oft_blocks.shape[0] # lora dim
+            #self.oft_blocks = rearrange(self.oft_blocks, 'k m ... -> (k m) ...')
         elif "oft_diag" in weights.w.keys():
             self.is_kohya = False
-            self.oft_blocks = weights.w["oft_diag"]
+            self.oft_blocks = weights.w["oft_diag"] # (num_blocks, block_size, block_size)
+
             # alpha is rank if alpha is 0 or None
             if self.alpha is None:
                 pass
@@ -51,12 +54,57 @@ class NetworkModuleOFT(network.NetworkModule):
             raise ValueError("sd_module must be Linear or Conv")
 
         if self.is_kohya:
-            self.num_blocks = self.dim
-            self.block_size = self.out_dim // self.num_blocks
+            #self.num_blocks = self.dim
+            #self.block_size = self.out_dim // self.num_blocks
+            #self.block_size = self.dim
+            #self.num_blocks = self.out_dim // self.block_size
             self.constraint = self.alpha * self.out_dim
+            self.num_blocks, self.block_size = factorization(self.out_dim, self.dim)
         else:
-            self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
             self.constraint = None
+            self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
+
+        if is_other_linear:
+            self.lin_module = self.create_module(weights.w, "oft_diag", none_ok=True)
+
+
+    def create_module(self, weights, key, none_ok=False):
+        weight = weights.get(key)
+
+        if weight is None and none_ok:
+            return None
+
+        is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention]
+        is_conv = type(self.sd_module) in [torch.nn.Conv2d]
+
+        if is_linear:
+            weight = weight.reshape(weight.shape[0], -1)
+            module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+        elif is_conv and key == "lora_down.weight" or key == "dyn_up":
+            if len(weight.shape) == 2:
+                weight = weight.reshape(weight.shape[0], -1, 1, 1)
+
+            if weight.shape[2] != 1 or weight.shape[3] != 1:
+                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+            else:
+                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+        elif is_conv and key == "lora_mid.weight":
+            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+        elif is_conv and key == "lora_up.weight" or key == "dyn_down":
+            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+        else:
+            raise AssertionError(f'Lora layer {self.network_key} matched a layer with unsupported type: {type(self.sd_module).__name__}')
+
+        with torch.no_grad():
+            if weight.shape != module.weight.shape:
+                weight = weight.reshape(module.weight.shape)
+            module.weight.copy_(weight)
+
+        module.to(device=devices.cpu, dtype=devices.dtype)
+        module.weight.requires_grad_(False)
+
+        return module
+
 
     def merge_weight(self, R_weight, org_weight):
         R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
@@ -77,7 +125,8 @@ class NetworkModuleOFT(network.NetworkModule):
         else:
             new_norm_Q = norm_Q
         block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
+        m_I = torch.eye(self.num_blocks, device=oft_blocks.device).unsqueeze(0).repeat(self.block_size, 1, 1)
+        #m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
         block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
 
         block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
@@ -97,25 +146,33 @@ class NetworkModuleOFT(network.NetworkModule):
         is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention]
 
         if not is_other_linear:
-            if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
-                orig_weight=orig_weight.permute(1, 0)
+            #if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
+            #    orig_weight=orig_weight.permute(1, 0)
+
+            oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+
+            # without this line the results are significantly worse / less accurate
+            oft_blocks = oft_blocks - oft_blocks.transpose(1, 2)
+
+            R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+            R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device)
 
-            R = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
             merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
             merged_weight = torch.einsum(
                 'k n m, k n ... -> k m ...',
-                R * multiplier + torch.eye(self.block_size, device=orig_weight.device),
+                R,
                 merged_weight
             )
             merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
 
-            if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
-                orig_weight=orig_weight.permute(1, 0)
+            #if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
+            #    orig_weight=orig_weight.permute(1, 0)
 
             updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
             output_shape = orig_weight.shape
         else:
             # FIXME: skip MultiheadAttention for now
+            #up = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
             updown = torch.zeros([orig_weight.shape[1], orig_weight.shape[1]], device=orig_weight.device, dtype=orig_weight.dtype)
             output_shape = (orig_weight.shape[1], orig_weight.shape[1])
 
@@ -123,10 +180,10 @@ class NetworkModuleOFT(network.NetworkModule):
 
     def calc_updown(self, orig_weight):
         multiplier = self.multiplier() * self.calc_scale()
-        if self.is_kohya:
-            return self.calc_updown_kohya(orig_weight, multiplier)
-        else:
-            return self.calc_updown_kb(orig_weight, multiplier)
+        #if self.is_kohya:
+        #    return self.calc_updown_kohya(orig_weight, multiplier)
+        #else:
+        return self.calc_updown_kb(orig_weight, multiplier)
 
     # override to remove the multiplier/scale factor; it's already multiplied in get_weight
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
-- 
cgit v1.2.3


From bbf00a96afb2215f13cc72a7908225ae300c423d Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Sat, 4 Nov 2023 14:56:47 -0700
Subject: refactor: remove unused function

---
 extensions-builtin/Lora/network_oft.py | 47 ----------------------------------
 1 file changed, 47 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index e4aa082b..93402bb2 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -2,7 +2,6 @@ import torch
 import network
 from lyco_helpers import factorization
 from einops import rearrange
-from modules import devices
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -54,58 +53,12 @@ class NetworkModuleOFT(network.NetworkModule):
             raise ValueError("sd_module must be Linear or Conv")
 
         if self.is_kohya:
-            #self.num_blocks = self.dim
-            #self.block_size = self.out_dim // self.num_blocks
-            #self.block_size = self.dim
-            #self.num_blocks = self.out_dim // self.block_size
             self.constraint = self.alpha * self.out_dim
             self.num_blocks, self.block_size = factorization(self.out_dim, self.dim)
         else:
             self.constraint = None
             self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
 
-        if is_other_linear:
-            self.lin_module = self.create_module(weights.w, "oft_diag", none_ok=True)
-
-
-    def create_module(self, weights, key, none_ok=False):
-        weight = weights.get(key)
-
-        if weight is None and none_ok:
-            return None
-
-        is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention]
-        is_conv = type(self.sd_module) in [torch.nn.Conv2d]
-
-        if is_linear:
-            weight = weight.reshape(weight.shape[0], -1)
-            module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
-        elif is_conv and key == "lora_down.weight" or key == "dyn_up":
-            if len(weight.shape) == 2:
-                weight = weight.reshape(weight.shape[0], -1, 1, 1)
-
-            if weight.shape[2] != 1 or weight.shape[3] != 1:
-                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
-            else:
-                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
-        elif is_conv and key == "lora_mid.weight":
-            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
-        elif is_conv and key == "lora_up.weight" or key == "dyn_down":
-            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
-        else:
-            raise AssertionError(f'Lora layer {self.network_key} matched a layer with unsupported type: {type(self.sd_module).__name__}')
-
-        with torch.no_grad():
-            if weight.shape != module.weight.shape:
-                weight = weight.reshape(module.weight.shape)
-            module.weight.copy_(weight)
-
-        module.to(device=devices.cpu, dtype=devices.dtype)
-        module.weight.requires_grad_(False)
-
-        return module
-
-
     def merge_weight(self, R_weight, org_weight):
         R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
         if org_weight.dim() == 4:
-- 
cgit v1.2.3


From 4d4a9e733219f8c065a4ab6c5ab42836db7330fe Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 5 Nov 2023 19:19:55 +0300
Subject: added compact prompt option

---
 extensions-builtin/mobile/javascript/mobile.js | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/mobile/javascript/mobile.js b/extensions-builtin/mobile/javascript/mobile.js
index 652f07ac..bff1aced 100644
--- a/extensions-builtin/mobile/javascript/mobile.js
+++ b/extensions-builtin/mobile/javascript/mobile.js
@@ -12,6 +12,8 @@ function isMobile() {
 }
 
 function reportWindowSize() {
+    if (gradioApp().querySelector('.toprow-compact-tools')) return; // not applicable for compact prompt layout
+
     var currentlyMobile = isMobile();
     if (currentlyMobile == isSetupForMobile) return;
     isSetupForMobile = currentlyMobile;
-- 
cgit v1.2.3


From d6d0b22e6657fc84039e82ee735a57101bfe7c17 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Wed, 15 Nov 2023 03:08:50 -0800
Subject: fix: ignore calc_scale() for COFT which has very small alpha

---
 extensions-builtin/Lora/network_oft.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 93402bb2..c45a8d23 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -99,12 +99,9 @@ class NetworkModuleOFT(network.NetworkModule):
         is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention]
 
         if not is_other_linear:
-            #if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
-            #    orig_weight=orig_weight.permute(1, 0)
-
             oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
 
-            # without this line the results are significantly worse / less accurate
+            # ensure skew-symmetric matrix
             oft_blocks = oft_blocks - oft_blocks.transpose(1, 2)
 
             R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
@@ -118,9 +115,6 @@ class NetworkModuleOFT(network.NetworkModule):
             )
             merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
 
-            #if is_other_linear and orig_weight.shape[0] != orig_weight.shape[1]:
-            #    orig_weight=orig_weight.permute(1, 0)
-
             updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
             output_shape = orig_weight.shape
         else:
@@ -132,10 +126,10 @@ class NetworkModuleOFT(network.NetworkModule):
         return self.finalize_updown(updown, orig_weight, output_shape)
 
     def calc_updown(self, orig_weight):
-        multiplier = self.multiplier() * self.calc_scale()
-        #if self.is_kohya:
-        #    return self.calc_updown_kohya(orig_weight, multiplier)
-        #else:
+        # if alpha is a very small number as in coft, calc_scale will return a almost zero number so we ignore it
+        #multiplier = self.multiplier() * self.calc_scale()
+        multiplier = self.multiplier()
+
         return self.calc_updown_kb(orig_weight, multiplier)
 
     # override to remove the multiplier/scale factor; it's already multiplied in get_weight
-- 
cgit v1.2.3


From eb667e715ad3eea981f6263c143ab0422e5340c9 Mon Sep 17 00:00:00 2001
From: v0xie <28695009+v0xie@users.noreply.github.com>
Date: Wed, 15 Nov 2023 18:28:48 -0800
Subject: feat: LyCORIS/kohya OFT network support

---
 extensions-builtin/Lora/network_oft.py | 108 ++++++++-------------------------
 1 file changed, 26 insertions(+), 82 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index c45a8d23..05c37811 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -11,8 +11,8 @@ class ModuleTypeOFT(network.ModuleType):
 
         return None
 
-# adapted from kohya-ss' implementation https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
-# and KohakuBlueleaf's implementation https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
+# Supports both kohya-ss' implementation of COFT  https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# and KohakuBlueleaf's implementation of OFT/COFT https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
 class NetworkModuleOFT(network.NetworkModule):
     def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
@@ -25,117 +25,61 @@ class NetworkModuleOFT(network.NetworkModule):
         if "oft_blocks" in weights.w.keys():
             self.is_kohya = True
             self.oft_blocks = weights.w["oft_blocks"] # (num_blocks, block_size, block_size)
-            self.alpha = weights.w["alpha"]
+            self.alpha = weights.w["alpha"] # alpha is constraint
             self.dim = self.oft_blocks.shape[0] # lora dim
-            #self.oft_blocks = rearrange(self.oft_blocks, 'k m ... -> (k m) ...')
+        # LyCORIS
         elif "oft_diag" in weights.w.keys():
             self.is_kohya = False
-            self.oft_blocks = weights.w["oft_diag"] # (num_blocks, block_size, block_size)
-
-            # alpha is rank if alpha is 0 or None
-            if self.alpha is None:
-                pass
-            self.dim = self.oft_blocks.shape[1] # FIXME: almost certainly incorrect, assumes tensor is shape [*, m, n]
-        else:
-            raise ValueError("oft_blocks or oft_diag must be in weights dict")
+            self.oft_blocks = weights.w["oft_diag"]
+            # self.alpha is unused
+            self.dim = self.oft_blocks.shape[1] # (num_blocks, block_size, block_size)
 
         is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
         is_conv = type(self.sd_module) in [torch.nn.Conv2d]
-        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention]
+        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention] # unsupported
 
         if is_linear:
             self.out_dim = self.sd_module.out_features
-        elif is_other_linear:
-            self.out_dim = self.sd_module.embed_dim
         elif is_conv:
             self.out_dim = self.sd_module.out_channels
-        else:
-            raise ValueError("sd_module must be Linear or Conv")
+        elif is_other_linear:
+            self.out_dim = self.sd_module.embed_dim
 
         if self.is_kohya:
             self.constraint = self.alpha * self.out_dim
-            self.num_blocks, self.block_size = factorization(self.out_dim, self.dim)
+            self.num_blocks = self.dim
+            self.block_size = self.out_dim // self.dim
         else:
             self.constraint = None
             self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
 
-    def merge_weight(self, R_weight, org_weight):
-        R_weight = R_weight.to(org_weight.device, dtype=org_weight.dtype)
-        if org_weight.dim() == 4:
-            weight = torch.einsum("oihw, op -> pihw", org_weight, R_weight)
-        else:
-            weight = torch.einsum("oi, op -> pi", org_weight, R_weight)
-        return weight
-
-    def get_weight(self, oft_blocks, multiplier=None):
-        if self.constraint is not None:
-            constraint = self.constraint.to(oft_blocks.device, dtype=oft_blocks.dtype)
-
-        block_Q = oft_blocks - oft_blocks.transpose(1, 2)
-        norm_Q = torch.norm(block_Q.flatten())
-        if self.constraint is not None:
-            new_norm_Q = torch.clamp(norm_Q, max=constraint)
-        else:
-            new_norm_Q = norm_Q
-        block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-        m_I = torch.eye(self.num_blocks, device=oft_blocks.device).unsqueeze(0).repeat(self.block_size, 1, 1)
-        #m_I = torch.eye(self.block_size, device=oft_blocks.device).unsqueeze(0).repeat(self.num_blocks, 1, 1)
-        block_R = torch.matmul(m_I + block_Q, (m_I - block_Q).inverse())
+    def calc_updown_kb(self, orig_weight, multiplier):
+        oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        oft_blocks = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
 
-        block_R_weighted = multiplier * block_R + (1 - multiplier) * m_I
-        R = torch.block_diag(*block_R_weighted)
-        return R
+        R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device)
 
-    def calc_updown_kohya(self, orig_weight, multiplier):
-        R = self.get_weight(self.oft_blocks, multiplier)
-        merged_weight = self.merge_weight(R, orig_weight)
+        # This errors out for MultiheadAttention, might need to be handled up-stream
+        merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+        merged_weight = torch.einsum(
+            'k n m, k n ... -> k m ...',
+            R,
+            merged_weight
+        )
+        merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
 
         updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
         output_shape = orig_weight.shape
-        orig_weight = orig_weight
-        return self.finalize_updown(updown, orig_weight, output_shape)
-
-    def calc_updown_kb(self, orig_weight, multiplier):
-        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention]
-
-        if not is_other_linear:
-            oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-
-            # ensure skew-symmetric matrix
-            oft_blocks = oft_blocks - oft_blocks.transpose(1, 2)
-
-            R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-            R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device)
-
-            merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
-            merged_weight = torch.einsum(
-                'k n m, k n ... -> k m ...',
-                R,
-                merged_weight
-            )
-            merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
-
-            updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
-            output_shape = orig_weight.shape
-        else:
-            # FIXME: skip MultiheadAttention for now
-            #up = self.lin_module.weight.to(orig_weight.device, dtype=orig_weight.dtype)
-            updown = torch.zeros([orig_weight.shape[1], orig_weight.shape[1]], device=orig_weight.device, dtype=orig_weight.dtype)
-            output_shape = (orig_weight.shape[1], orig_weight.shape[1])
-
         return self.finalize_updown(updown, orig_weight, output_shape)
 
     def calc_updown(self, orig_weight):
-        # if alpha is a very small number as in coft, calc_scale will return a almost zero number so we ignore it
-        #multiplier = self.multiplier() * self.calc_scale()
+        # if alpha is a very small number as in coft, calc_scale() will return a almost zero number so we ignore it
         multiplier = self.multiplier()
-
         return self.calc_updown_kb(orig_weight, multiplier)
 
     # override to remove the multiplier/scale factor; it's already multiplied in get_weight
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
-        #return super().finalize_updown(updown, orig_weight, output_shape, ex_bias)
-
         if self.bias is not None:
             updown = updown.reshape(self.bias.shape)
             updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
-- 
cgit v1.2.3


From 370a77f8e78e65a8a1339289d684cb43df142f70 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Tue, 21 Nov 2023 19:59:34 +0800
Subject: Option for using fp16 weight when apply lora

---
 extensions-builtin/Lora/networks.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 0170dbfb..d22ed843 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -388,18 +388,26 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             if module is not None and hasattr(self, 'weight'):
                 try:
                     with torch.no_grad():
-                        updown, ex_bias = module.calc_updown(self.weight)
+                        if getattr(self, 'fp16_weight', None) is None:
+                            weight = self.weight
+                            bias = self.bias
+                        else:
+                            weight = self.fp16_weight.clone().to(self.weight.device)
+                            bias = getattr(self, 'fp16_bias', None)
+                            if bias is not None:
+                                bias = bias.clone().to(self.bias.device)
+                        updown, ex_bias = module.calc_updown(weight)
 
-                        if len(self.weight.shape) == 4 and self.weight.shape[1] == 9:
+                        if len(weight.shape) == 4 and weight.shape[1] == 9:
                             # inpainting model. zero pad updown to make channel[1]  4 to 9
                             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))
 
-                        self.weight.copy_((self.weight.to(dtype=updown.dtype) + updown).to(dtype=self.weight.dtype))
+                        self.weight.copy_((weight.to(dtype=updown.dtype) + updown).to(dtype=self.weight.dtype))
                         if ex_bias is not None and hasattr(self, 'bias'):
                             if self.bias is None:
                                 self.bias = torch.nn.Parameter(ex_bias).to(self.weight.dtype)
                             else:
-                                self.bias.copy_((self.bias.to(dtype=ex_bias.dtype) + ex_bias).to(dtype=self.bias.dtype))
+                                self.bias.copy_((bias + ex_bias).to(dtype=self.bias.dtype))
                 except RuntimeError as e:
                     logging.debug(f"Network {net.name} layer {network_layer_name}: {e}")
                     extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-- 
cgit v1.2.3


From 3a9bf4ac10d99feb81b0e637417a108d3fa5ac06 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 26 Nov 2023 08:29:12 +0300
Subject: move file

---
 extensions-builtin/hypertile/hypertile.py | 371 ++++++++++++++++++++++++++++++
 1 file changed, 371 insertions(+)
 create mode 100644 extensions-builtin/hypertile/hypertile.py

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py
new file mode 100644
index 00000000..be898fce
--- /dev/null
+++ b/extensions-builtin/hypertile/hypertile.py
@@ -0,0 +1,371 @@
+"""
+Hypertile module for splitting attention layers in SD-1.5 U-Net and SD-1.5 VAE
+Warn : The patch works well only if the input image has a width and height that are multiples of 128
+Author : @tfernd Github : https://github.com/tfernd/HyperTile
+"""
+
+from __future__ import annotations
+from typing import Callable
+from typing_extensions import Literal
+
+import logging
+from functools import wraps, cache
+from contextlib import contextmanager
+
+import math
+import torch.nn as nn
+import random
+
+from einops import rearrange
+
+# TODO add SD-XL layers
+DEPTH_LAYERS = {
+    0: [
+        # SD 1.5 U-Net (diffusers)
+        "down_blocks.0.attentions.0.transformer_blocks.0.attn1",
+        "down_blocks.0.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.3.attentions.0.transformer_blocks.0.attn1",
+        "up_blocks.3.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.3.attentions.2.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "input_blocks.1.1.transformer_blocks.0.attn1",
+        "input_blocks.2.1.transformer_blocks.0.attn1",
+        "output_blocks.9.1.transformer_blocks.0.attn1",
+        "output_blocks.10.1.transformer_blocks.0.attn1",
+        "output_blocks.11.1.transformer_blocks.0.attn1",
+        # SD 1.5 VAE
+        "decoder.mid_block.attentions.0",
+        "decoder.mid.attn_1",
+    ],
+    1: [
+        # SD 1.5 U-Net (diffusers)
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1",
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.2.attentions.0.transformer_blocks.0.attn1",
+        "up_blocks.2.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.2.attentions.2.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "input_blocks.4.1.transformer_blocks.0.attn1",
+        "input_blocks.5.1.transformer_blocks.0.attn1",
+        "output_blocks.6.1.transformer_blocks.0.attn1",
+        "output_blocks.7.1.transformer_blocks.0.attn1",
+        "output_blocks.8.1.transformer_blocks.0.attn1",
+    ],
+    2: [
+        # SD 1.5 U-Net (diffusers)
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1",
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1",
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "input_blocks.7.1.transformer_blocks.0.attn1",
+        "input_blocks.8.1.transformer_blocks.0.attn1",
+        "output_blocks.3.1.transformer_blocks.0.attn1",
+        "output_blocks.4.1.transformer_blocks.0.attn1",
+        "output_blocks.5.1.transformer_blocks.0.attn1",
+    ],
+    3: [
+        # SD 1.5 U-Net (diffusers)
+        "mid_block.attentions.0.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "middle_block.1.transformer_blocks.0.attn1",
+    ],
+}
+# XL layers, thanks for GitHub@gel-crabs for the help
+DEPTH_LAYERS_XL = {
+    0: [
+        # SD 1.5 U-Net (diffusers)
+        "down_blocks.0.attentions.0.transformer_blocks.0.attn1",
+        "down_blocks.0.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.3.attentions.0.transformer_blocks.0.attn1",
+        "up_blocks.3.attentions.1.transformer_blocks.0.attn1",
+        "up_blocks.3.attentions.2.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "input_blocks.4.1.transformer_blocks.0.attn1",
+        "input_blocks.5.1.transformer_blocks.0.attn1",
+        "output_blocks.3.1.transformer_blocks.0.attn1",
+        "output_blocks.4.1.transformer_blocks.0.attn1",
+        "output_blocks.5.1.transformer_blocks.0.attn1",
+        # SD 1.5 VAE
+        "decoder.mid_block.attentions.0",
+        "decoder.mid.attn_1",
+    ],
+    1: [
+        # SD 1.5 U-Net (diffusers)
+        #"down_blocks.1.attentions.0.transformer_blocks.0.attn1",
+        #"down_blocks.1.attentions.1.transformer_blocks.0.attn1",
+        #"up_blocks.2.attentions.0.transformer_blocks.0.attn1",
+        #"up_blocks.2.attentions.1.transformer_blocks.0.attn1",
+        #"up_blocks.2.attentions.2.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "input_blocks.4.1.transformer_blocks.1.attn1",
+        "input_blocks.5.1.transformer_blocks.1.attn1",
+        "output_blocks.3.1.transformer_blocks.1.attn1",
+        "output_blocks.4.1.transformer_blocks.1.attn1",
+        "output_blocks.5.1.transformer_blocks.1.attn1",
+        "input_blocks.7.1.transformer_blocks.0.attn1",
+        "input_blocks.8.1.transformer_blocks.0.attn1",
+        "output_blocks.0.1.transformer_blocks.0.attn1",
+        "output_blocks.1.1.transformer_blocks.0.attn1",
+        "output_blocks.2.1.transformer_blocks.0.attn1",
+        "input_blocks.7.1.transformer_blocks.1.attn1",
+        "input_blocks.8.1.transformer_blocks.1.attn1",
+        "output_blocks.0.1.transformer_blocks.1.attn1",
+        "output_blocks.1.1.transformer_blocks.1.attn1",
+        "output_blocks.2.1.transformer_blocks.1.attn1",
+        "input_blocks.7.1.transformer_blocks.2.attn1",
+        "input_blocks.8.1.transformer_blocks.2.attn1",
+        "output_blocks.0.1.transformer_blocks.2.attn1",
+        "output_blocks.1.1.transformer_blocks.2.attn1",
+        "output_blocks.2.1.transformer_blocks.2.attn1",
+        "input_blocks.7.1.transformer_blocks.3.attn1",
+        "input_blocks.8.1.transformer_blocks.3.attn1",
+        "output_blocks.0.1.transformer_blocks.3.attn1",
+        "output_blocks.1.1.transformer_blocks.3.attn1",
+        "output_blocks.2.1.transformer_blocks.3.attn1",
+        "input_blocks.7.1.transformer_blocks.4.attn1",
+        "input_blocks.8.1.transformer_blocks.4.attn1",
+        "output_blocks.0.1.transformer_blocks.4.attn1",
+        "output_blocks.1.1.transformer_blocks.4.attn1",
+        "output_blocks.2.1.transformer_blocks.4.attn1",
+        "input_blocks.7.1.transformer_blocks.5.attn1",
+        "input_blocks.8.1.transformer_blocks.5.attn1",
+        "output_blocks.0.1.transformer_blocks.5.attn1",
+        "output_blocks.1.1.transformer_blocks.5.attn1",
+        "output_blocks.2.1.transformer_blocks.5.attn1",
+        "input_blocks.7.1.transformer_blocks.6.attn1",
+        "input_blocks.8.1.transformer_blocks.6.attn1",
+        "output_blocks.0.1.transformer_blocks.6.attn1",
+        "output_blocks.1.1.transformer_blocks.6.attn1",
+        "output_blocks.2.1.transformer_blocks.6.attn1",
+        "input_blocks.7.1.transformer_blocks.7.attn1",
+        "input_blocks.8.1.transformer_blocks.7.attn1",
+        "output_blocks.0.1.transformer_blocks.7.attn1",
+        "output_blocks.1.1.transformer_blocks.7.attn1",
+        "output_blocks.2.1.transformer_blocks.7.attn1",
+        "input_blocks.7.1.transformer_blocks.8.attn1",
+        "input_blocks.8.1.transformer_blocks.8.attn1",
+        "output_blocks.0.1.transformer_blocks.8.attn1",
+        "output_blocks.1.1.transformer_blocks.8.attn1",
+        "output_blocks.2.1.transformer_blocks.8.attn1",
+        "input_blocks.7.1.transformer_blocks.9.attn1",
+        "input_blocks.8.1.transformer_blocks.9.attn1",
+        "output_blocks.0.1.transformer_blocks.9.attn1",
+        "output_blocks.1.1.transformer_blocks.9.attn1",
+        "output_blocks.2.1.transformer_blocks.9.attn1",
+    ],
+    2: [
+        # SD 1.5 U-Net (diffusers)
+        "mid_block.attentions.0.transformer_blocks.0.attn1",
+        # SD 1.5 U-Net (ldm)
+        "middle_block.1.transformer_blocks.0.attn1",
+        "middle_block.1.transformer_blocks.1.attn1",
+        "middle_block.1.transformer_blocks.2.attn1",
+        "middle_block.1.transformer_blocks.3.attn1",
+        "middle_block.1.transformer_blocks.4.attn1",
+        "middle_block.1.transformer_blocks.5.attn1",
+        "middle_block.1.transformer_blocks.6.attn1",
+        "middle_block.1.transformer_blocks.7.attn1",
+        "middle_block.1.transformer_blocks.8.attn1",
+        "middle_block.1.transformer_blocks.9.attn1",
+    ],
+    3 : [] # TODO - separate layers for SD-XL
+}
+
+
+RNG_INSTANCE = random.Random()
+
+def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
+    """
+    Returns a random divisor of value that
+        x * min_value <= value
+    if max_options is 1, the behavior is deterministic
+    """
+    min_value = min(min_value, value)
+
+    # All big divisors of value (inclusive)
+    divisors = [i for i in range(min_value, value + 1) if value % i == 0] # divisors in small -> big order
+
+    ns = [value // i for i in divisors[:max_options]]  # has at least 1 element # big -> small order
+
+    idx = RNG_INSTANCE.randint(0, len(ns) - 1)
+
+    return ns[idx]
+
+def set_hypertile_seed(seed: int) -> None:
+    RNG_INSTANCE.seed(seed)
+
+def largest_tile_size_available(width:int, height:int) -> int:
+    """
+    Calculates the largest tile size available for a given width and height
+    Tile size is always a power of 2
+    """
+    gcd = math.gcd(width, height)
+    largest_tile_size_available = 1
+    while gcd % (largest_tile_size_available * 2) == 0:
+        largest_tile_size_available *= 2
+    return largest_tile_size_available
+
+def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]:
+    """
+    Finds h and w such that h*w = hw and h/w = aspect_ratio
+    We check all possible divisors of hw and return the closest to the aspect ratio
+    """
+    divisors = [i for i in range(2, hw + 1) if hw % i == 0] # all divisors of hw
+    pairs = [(i, hw // i) for i in divisors] # all pairs of divisors of hw
+    ratios = [w/h for h, w in pairs] # all ratios of pairs of divisors of hw
+    closest_ratio = min(ratios, key=lambda x: abs(x - aspect_ratio)) # closest ratio to aspect_ratio
+    closest_pair = pairs[ratios.index(closest_ratio)] # closest pair of divisors to aspect_ratio
+    return closest_pair
+
+@cache
+def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]:
+    """
+    Finds h and w such that h*w = hw and h/w = aspect_ratio
+    """
+    h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio))
+    # find h and w such that h*w = hw and h/w = aspect_ratio
+    if h * w != hw:
+        w_candidate = hw / h
+        # check if w is an integer
+        if not w_candidate.is_integer():
+            h_candidate = hw / w
+            # check if h is an integer
+            if not h_candidate.is_integer():
+                return iterative_closest_divisors(hw, aspect_ratio)
+            else:
+                h = int(h_candidate)
+        else:
+            w = int(w_candidate)
+    return h, w
+
+@contextmanager
+def split_attention(
+    layer: nn.Module,
+    /,
+    aspect_ratio: float,  # width/height
+    tile_size: int = 128,  # 128 for VAE
+    swap_size: int = 1,  # 1 for VAE
+    *,
+    disable: bool = False,
+    max_depth: Literal[0, 1, 2, 3] = 0,  # ! Try 0 or 1
+    scale_depth: bool = True,  # scale the tile-size depending on the depth
+    is_sdxl: bool = False,  # is the model SD-XL
+):
+    # Hijacks AttnBlock from ldm and Attention from diffusers
+
+    if disable:
+        logging.info(f"Attention for {layer.__class__.__qualname__} not splitted")
+        yield
+        return
+
+    latent_tile_size = max(128, tile_size) // 8
+
+    def self_attn_forward(forward: Callable, depth: int, layer_name: str, module: nn.Module) -> Callable:
+        @wraps(forward)
+        def wrapper(*args, **kwargs):
+            x = args[0]
+
+            # VAE
+            if x.ndim == 4:
+                b, c, h, w = x.shape
+
+                nh = random_divisor(h, latent_tile_size, swap_size)
+                nw = random_divisor(w, latent_tile_size, swap_size)
+
+                if nh * nw > 1:
+                    x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw) # split into nh * nw tiles
+
+                out = forward(x, *args[1:], **kwargs)
+
+                if nh * nw > 1:
+                    out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw)
+
+            # U-Net
+            else:
+                hw: int = x.size(1)
+                h, w = find_hw_candidates(hw, aspect_ratio)
+                assert h * w == hw, f"Invalid aspect ratio {aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}"
+
+                factor = 2**depth if scale_depth else 1
+                nh = random_divisor(h, latent_tile_size * factor, swap_size)
+                nw = random_divisor(w, latent_tile_size * factor, swap_size)
+
+                module._split_sizes_hypertile.append((nh, nw))  # type: ignore
+
+                if nh * nw > 1:
+                    x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
+
+                out = forward(x, *args[1:], **kwargs)
+
+                if nh * nw > 1:
+                    out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
+                    out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
+
+            return out
+
+        return wrapper
+
+    # Handle hijacking the forward method and recovering afterwards
+    try:
+        if is_sdxl:
+            layers = DEPTH_LAYERS_XL
+        else:
+            layers = DEPTH_LAYERS
+        for depth in range(max_depth + 1):
+            for layer_name, module in layer.named_modules():
+                if any(layer_name.endswith(try_name) for try_name in layers[depth]):
+                    # print input shape for debugging
+                    logging.debug(f"HyperTile hijacking attention layer at depth {depth}: {layer_name}")
+                    # hijack
+                    module._original_forward_hypertile = module.forward
+                    module.forward = self_attn_forward(module.forward, depth, layer_name, module)
+                    module._split_sizes_hypertile = []
+        yield
+    finally:
+        for layer_name, module in layer.named_modules():
+            # remove hijack
+            if hasattr(module, "_original_forward_hypertile"):
+                if module._split_sizes_hypertile:
+                    logging.debug(f"layer {layer_name} splitted with ({module._split_sizes_hypertile})")
+                # recover
+                module.forward = module._original_forward_hypertile
+                del module._original_forward_hypertile
+                del module._split_sizes_hypertile
+
+def hypertile_context_vae(model:nn.Module, aspect_ratio:float, tile_size:int, opts):
+    """
+    Returns context manager for VAE
+    """
+    enabled = opts.hypertile_split_vae_attn
+    swap_size = opts.hypertile_swap_size_vae
+    max_depth = opts.hypertile_max_depth_vae
+    tile_size_max = opts.hypertile_max_tile_vae
+    return split_attention(
+        model,
+        aspect_ratio=aspect_ratio,
+        tile_size=min(tile_size, tile_size_max),
+        swap_size=swap_size,
+        disable=not enabled,
+        max_depth=max_depth,
+        is_sdxl=False,
+    )
+
+def hypertile_context_unet(model:nn.Module, aspect_ratio:float, tile_size:int, opts, is_sdxl:bool):
+    """
+    Returns context manager for U-Net
+    """
+    enabled = opts.hypertile_split_unet_attn
+    swap_size = opts.hypertile_swap_size_unet
+    max_depth = opts.hypertile_max_depth_unet
+    tile_size_max = opts.hypertile_max_tile_unet
+    return split_attention(
+        model,
+        aspect_ratio=aspect_ratio,
+        tile_size=min(tile_size, tile_size_max),
+        swap_size=swap_size,
+        disable=not enabled,
+        max_depth=max_depth,
+        is_sdxl=is_sdxl,
+    )
-- 
cgit v1.2.3


From d2e0c1ca132f4f0d98b77397a9f353d4ad8e7c4b Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 26 Nov 2023 10:51:45 +0300
Subject: rework hypertile into a built-in extension

---
 extensions-builtin/hypertile/hypertile.py          | 221 +++++++++------------
 .../hypertile/scripts/hypertile_script.py          |  73 +++++++
 2 files changed, 172 insertions(+), 122 deletions(-)
 create mode 100644 extensions-builtin/hypertile/scripts/hypertile_script.py

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py
index be898fce..a40c1311 100644
--- a/extensions-builtin/hypertile/hypertile.py
+++ b/extensions-builtin/hypertile/hypertile.py
@@ -1,10 +1,13 @@
 """
 Hypertile module for splitting attention layers in SD-1.5 U-Net and SD-1.5 VAE
-Warn : The patch works well only if the input image has a width and height that are multiples of 128
-Author : @tfernd Github : https://github.com/tfernd/HyperTile
+Warn: The patch works well only if the input image has a width and height that are multiples of 128
+Original author: @tfernd Github: https://github.com/tfernd/HyperTile
 """
 
 from __future__ import annotations
+
+import functools
+from dataclasses import dataclass
 from typing import Callable
 from typing_extensions import Literal
 
@@ -18,6 +21,19 @@ import random
 
 from einops import rearrange
 
+
+@dataclass
+class HypertileParams:
+    depth = 0
+    layer_name = ""
+    tile_size: int = 0
+    swap_size: int = 0
+    aspect_ratio: float = 1.0
+    forward = None
+    enabled = False
+
+
+
 # TODO add SD-XL layers
 DEPTH_LAYERS = {
     0: [
@@ -176,6 +192,7 @@ DEPTH_LAYERS_XL = {
 
 RNG_INSTANCE = random.Random()
 
+
 def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
     """
     Returns a random divisor of value that
@@ -193,10 +210,13 @@ def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
 
     return ns[idx]
 
+
 def set_hypertile_seed(seed: int) -> None:
     RNG_INSTANCE.seed(seed)
 
-def largest_tile_size_available(width:int, height:int) -> int:
+
+@functools.cache
+def largest_tile_size_available(width: int, height: int) -> int:
     """
     Calculates the largest tile size available for a given width and height
     Tile size is always a power of 2
@@ -207,6 +227,7 @@ def largest_tile_size_available(width:int, height:int) -> int:
         largest_tile_size_available *= 2
     return largest_tile_size_available
 
+
 def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]:
     """
     Finds h and w such that h*w = hw and h/w = aspect_ratio
@@ -219,6 +240,7 @@ def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]:
     closest_pair = pairs[ratios.index(closest_ratio)] # closest pair of divisors to aspect_ratio
     return closest_pair
 
+
 @cache
 def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]:
     """
@@ -240,132 +262,87 @@ def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]:
             w = int(w_candidate)
     return h, w
 
-@contextmanager
-def split_attention(
-    layer: nn.Module,
-    /,
-    aspect_ratio: float,  # width/height
-    tile_size: int = 128,  # 128 for VAE
-    swap_size: int = 1,  # 1 for VAE
-    *,
-    disable: bool = False,
-    max_depth: Literal[0, 1, 2, 3] = 0,  # ! Try 0 or 1
-    scale_depth: bool = True,  # scale the tile-size depending on the depth
-    is_sdxl: bool = False,  # is the model SD-XL
-):
-    # Hijacks AttnBlock from ldm and Attention from diffusers
-
-    if disable:
-        logging.info(f"Attention for {layer.__class__.__qualname__} not splitted")
-        yield
-        return
-
-    latent_tile_size = max(128, tile_size) // 8
-
-    def self_attn_forward(forward: Callable, depth: int, layer_name: str, module: nn.Module) -> Callable:
-        @wraps(forward)
-        def wrapper(*args, **kwargs):
-            x = args[0]
-
-            # VAE
-            if x.ndim == 4:
-                b, c, h, w = x.shape
-
-                nh = random_divisor(h, latent_tile_size, swap_size)
-                nw = random_divisor(w, latent_tile_size, swap_size)
-
-                if nh * nw > 1:
-                    x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw) # split into nh * nw tiles
-
-                out = forward(x, *args[1:], **kwargs)
-
-                if nh * nw > 1:
-                    out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw)
-
-            # U-Net
-            else:
-                hw: int = x.size(1)
-                h, w = find_hw_candidates(hw, aspect_ratio)
-                assert h * w == hw, f"Invalid aspect ratio {aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}"
 
-                factor = 2**depth if scale_depth else 1
-                nh = random_divisor(h, latent_tile_size * factor, swap_size)
-                nw = random_divisor(w, latent_tile_size * factor, swap_size)
+def self_attn_forward(params: HypertileParams, scale_depth=True) -> Callable:
+
+    @wraps(params.forward)
+    def wrapper(*args, **kwargs):
+        if not params.enabled:
+            return params.forward(*args, **kwargs)
 
-                module._split_sizes_hypertile.append((nh, nw))  # type: ignore
+        latent_tile_size = max(128, params.tile_size) // 8
+        x = args[0]
 
-                if nh * nw > 1:
-                    x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
+        # VAE
+        if x.ndim == 4:
+            b, c, h, w = x.shape
 
-                out = forward(x, *args[1:], **kwargs)
+            nh = random_divisor(h, latent_tile_size, params.swap_size)
+            nw = random_divisor(w, latent_tile_size, params.swap_size)
 
-                if nh * nw > 1:
-                    out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
-                    out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
+            if nh * nw > 1:
+                x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw)  # split into nh * nw tiles
 
-            return out
+            out = params.forward(x, *args[1:], **kwargs)
 
-        return wrapper
+            if nh * nw > 1:
+                out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw)
 
-    # Handle hijacking the forward method and recovering afterwards
-    try:
-        if is_sdxl:
-            layers = DEPTH_LAYERS_XL
+        # U-Net
         else:
-            layers = DEPTH_LAYERS
-        for depth in range(max_depth + 1):
-            for layer_name, module in layer.named_modules():
+            hw: int = x.size(1)
+            h, w = find_hw_candidates(hw, params.aspect_ratio)
+            assert h * w == hw, f"Invalid aspect ratio {params.aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}"
+
+            factor = 2 ** params.depth if scale_depth else 1
+            nh = random_divisor(h, latent_tile_size * factor, params.swap_size)
+            nw = random_divisor(w, latent_tile_size * factor, params.swap_size)
+
+            if nh * nw > 1:
+                x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
+
+            out = params.forward(x, *args[1:], **kwargs)
+
+            if nh * nw > 1:
+                out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
+                out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
+
+        return out
+
+    return wrapper
+
+
+def hypertile_hook_model(model: nn.Module, width, height, *, enable=False, tile_size_max=128, swap_size=1, max_depth=3, is_sdxl=False):
+    hypertile_layers = getattr(model, "__webui_hypertile_layers", None)
+    if hypertile_layers is None:
+        if not enable:
+            return
+
+        hypertile_layers = {}
+        layers = DEPTH_LAYERS_XL if is_sdxl else DEPTH_LAYERS
+
+        for depth in range(4):
+            for layer_name, module in model.named_modules():
                 if any(layer_name.endswith(try_name) for try_name in layers[depth]):
-                    # print input shape for debugging
-                    logging.debug(f"HyperTile hijacking attention layer at depth {depth}: {layer_name}")
-                    # hijack
-                    module._original_forward_hypertile = module.forward
-                    module.forward = self_attn_forward(module.forward, depth, layer_name, module)
-                    module._split_sizes_hypertile = []
-        yield
-    finally:
-        for layer_name, module in layer.named_modules():
-            # remove hijack
-            if hasattr(module, "_original_forward_hypertile"):
-                if module._split_sizes_hypertile:
-                    logging.debug(f"layer {layer_name} splitted with ({module._split_sizes_hypertile})")
-                # recover
-                module.forward = module._original_forward_hypertile
-                del module._original_forward_hypertile
-                del module._split_sizes_hypertile
-
-def hypertile_context_vae(model:nn.Module, aspect_ratio:float, tile_size:int, opts):
-    """
-    Returns context manager for VAE
-    """
-    enabled = opts.hypertile_split_vae_attn
-    swap_size = opts.hypertile_swap_size_vae
-    max_depth = opts.hypertile_max_depth_vae
-    tile_size_max = opts.hypertile_max_tile_vae
-    return split_attention(
-        model,
-        aspect_ratio=aspect_ratio,
-        tile_size=min(tile_size, tile_size_max),
-        swap_size=swap_size,
-        disable=not enabled,
-        max_depth=max_depth,
-        is_sdxl=False,
-    )
-
-def hypertile_context_unet(model:nn.Module, aspect_ratio:float, tile_size:int, opts, is_sdxl:bool):
-    """
-    Returns context manager for U-Net
-    """
-    enabled = opts.hypertile_split_unet_attn
-    swap_size = opts.hypertile_swap_size_unet
-    max_depth = opts.hypertile_max_depth_unet
-    tile_size_max = opts.hypertile_max_tile_unet
-    return split_attention(
-        model,
-        aspect_ratio=aspect_ratio,
-        tile_size=min(tile_size, tile_size_max),
-        swap_size=swap_size,
-        disable=not enabled,
-        max_depth=max_depth,
-        is_sdxl=is_sdxl,
-    )
+                    params = HypertileParams()
+                    module.__webui_hypertile_params = params
+                    params.forward = module.forward
+                    params.depth = depth
+                    params.layer_name = layer_name
+                    module.forward = self_attn_forward(params)
+
+                    hypertile_layers[layer_name] = 1
+
+        model.__webui_hypertile_layers = hypertile_layers
+
+    aspect_ratio = width / height
+    tile_size = min(largest_tile_size_available(width, height), tile_size_max)
+
+    for layer_name, module in model.named_modules():
+        if layer_name in hypertile_layers:
+            params = module.__webui_hypertile_params
+
+            params.tile_size = tile_size
+            params.swap_size = swap_size
+            params.aspect_ratio = aspect_ratio
+            params.enabled = enable and params.depth <= max_depth
diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
new file mode 100644
index 00000000..3cc29cd1
--- /dev/null
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -0,0 +1,73 @@
+import hypertile
+from modules import scripts, script_callbacks, shared
+
+
+class ScriptHypertile(scripts.Script):
+    name = "Hypertile"
+
+    def title(self):
+        return self.name
+
+    def show(self, is_img2img):
+        return scripts.AlwaysVisible
+
+    def process(self, p, *args):
+        hypertile.set_hypertile_seed(p.all_seeds[0])
+
+        configure_hypertile(p.width, p.height, enable_unet=shared.opts.hypertile_enable_unet)
+
+    def before_hr(self, p, *args):
+        configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet)
+
+
+def configure_hypertile(width, height, enable_unet=True):
+    hypertile.hypertile_hook_model(
+        shared.sd_model.first_stage_model,
+        width,
+        height,
+        swap_size=shared.opts.hypertile_swap_size_vae,
+        max_depth=shared.opts.hypertile_max_depth_vae,
+        tile_size_max=shared.opts.hypertile_max_tile_vae,
+        enable=shared.opts.hypertile_enable_vae,
+    )
+
+    hypertile.hypertile_hook_model(
+        shared.sd_model.model,
+        width,
+        height,
+        swap_size=shared.opts.hypertile_swap_size_unet,
+        max_depth=shared.opts.hypertile_max_depth_unet,
+        tile_size_max=shared.opts.hypertile_max_tile_unet,
+        enable=enable_unet,
+        is_sdxl=shared.sd_model.is_sdxl
+    )
+
+
+def on_ui_settings():
+    import gradio as gr
+
+    options = {
+        "hypertile_explanation": shared.OptionHTML("""
+    <a href='https://github.com/tfernd/HyperTile'>Hypertile</a> optimizes the self-attention layer within U-Net and VAE models,
+    resulting in a reduction in computation time ranging from 1 to 4 times. The larger the generated image is, the greater the
+    benefit.
+    """),
+
+        "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net").info("noticeable change in details of the generated picture; if enabled, overrides the setting below"),
+        "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass"),
+        "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
+        "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
+        "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+
+        "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE").info("minimal change in the generated picture"),
+        "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
+        "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
+        "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+    }
+
+    for name, opt in options.items():
+        opt.section = ('hypertile', "Hypertile")
+        shared.opts.add_option(name, opt)
+
+
+script_callbacks.on_ui_settings(on_ui_settings)
-- 
cgit v1.2.3


From d1750e5eca6fd95db3516928cad18b32e557f56f Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 26 Nov 2023 11:37:12 +0300
Subject: fix linter errors

---
 extensions-builtin/hypertile/hypertile.py | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py
index a40c1311..feb02fd2 100644
--- a/extensions-builtin/hypertile/hypertile.py
+++ b/extensions-builtin/hypertile/hypertile.py
@@ -9,11 +9,8 @@ from __future__ import annotations
 import functools
 from dataclasses import dataclass
 from typing import Callable
-from typing_extensions import Literal
 
-import logging
 from functools import wraps, cache
-from contextlib import contextmanager
 
 import math
 import torch.nn as nn
-- 
cgit v1.2.3


From 23c36f59b4a423362d74f1ca2cc69871ae101e0e Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Mon, 27 Nov 2023 21:10:26 +0900
Subject: Support XYZ scripts / split hires path from unet

---
 .../hypertile/scripts/hypertile_script.py          | 11 +++--
 .../hypertile/scripts/hypertile_xyz.py             | 52 ++++++++++++++++++++++
 2 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 extensions-builtin/hypertile/scripts/hypertile_xyz.py

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
index 3cc29cd1..b2413cc5 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_script.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -1,5 +1,6 @@
 import hypertile
 from modules import scripts, script_callbacks, shared
+from scripts.hypertile_xyz import add_axis_options
 
 
 class ScriptHypertile(scripts.Script):
@@ -17,7 +18,10 @@ class ScriptHypertile(scripts.Script):
         configure_hypertile(p.width, p.height, enable_unet=shared.opts.hypertile_enable_unet)
 
     def before_hr(self, p, *args):
-        configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet)
+        # exclusive hypertile seed for the second pass
+        if not shared.opts.hypertile_enable_unet:
+            hypertile.set_hypertile_seed(p.all_seeds[0])
+        configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass)
 
 
 def configure_hypertile(width, height, enable_unet=True):
@@ -57,12 +61,12 @@ def on_ui_settings():
         "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass"),
         "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
         "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
-        "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+        "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}),
 
         "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE").info("minimal change in the generated picture"),
         "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
         "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
-        "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+        "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}),
     }
 
     for name, opt in options.items():
@@ -71,3 +75,4 @@ def on_ui_settings():
 
 
 script_callbacks.on_ui_settings(on_ui_settings)
+script_callbacks.on_before_ui(add_axis_options)
\ No newline at end of file
diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
new file mode 100644
index 00000000..eaf7c8d7
--- /dev/null
+++ b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
@@ -0,0 +1,52 @@
+from modules import scripts
+xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
+from modules.shared import opts
+
+def int_applier(value_name:str, min_range:int = -1, max_range:int = -1):
+    """
+    Returns a function that applies the given value to the given value_name in opts.data.
+    """
+    # convert to int
+    def validate(value_name:str, value:str):
+        try:
+            value = int(value)
+        except:
+            raise ValueError(f"Value {value} for {value_name} is not an integer")
+        # validate value
+        if not min_range == -1:
+            assert value >= min_range, f"Value {value} for {value_name} must be greater than or equal to {min_range}"
+        if not max_range == -1:
+            assert value <= max_range, f"Value {value} for {value_name} must be less than or equal to {max_range}"
+    def apply_int(p, x, xs):
+        validate(value_name, x)
+        opts.data[value_name] = int(x)
+    return apply_int
+
+def bool_applier(value_name:str):
+    """
+    Returns a function that applies the given value to the given value_name in opts.data.
+    """
+    def validate(value_name:str, value:str):
+        assert value.lower() in ["true", "false"], f"Value {value} for {value_name} must be either true or false"
+    def apply_bool(p, x, xs):
+        validate(value_name, x)
+        value_boolean = x.lower() == "true"
+        opts.data[value_name] = value_boolean
+    return apply_bool
+
+def add_axis_options():
+    extra_axis_options = [
+        xyz_grid.AxisOption("[Hypertile] Unet First pass Enabled", str, bool_applier("hypertile_enable_unet"), choices=xyz_grid.boolean_choice(reverse=True)),
+        xyz_grid.AxisOption("[Hypertile] Unet Second pass Enabled", str, bool_applier("hypertile_enable_unet_secondpass"), choices=xyz_grid.boolean_choice(reverse=True)),
+        xyz_grid.AxisOption("[Hypertile] Unet Max Depth", int, int_applier("hypertile_max_depth_unet", 0, 3), choices=lambda: [str(x) for x in range(4)]),
+        xyz_grid.AxisOption("[Hypertile] Unet Max Tile Size", int, int_applier("hypertile_max_tile_unet", 0, 512)),
+        xyz_grid.AxisOption("[Hypertile] Unet Swap Size", int, int_applier("hypertile_swap_size_unet", 0, 64)),
+        xyz_grid.AxisOption("[Hypertile] VAE Enabled", str, bool_applier("hypertile_enable_vae"), choices=xyz_grid.boolean_choice(reverse=True)),
+        xyz_grid.AxisOption("[Hypertile] VAE Max Depth", int, int_applier("hypertile_max_depth_vae", 0, 3), choices=lambda: [str(x) for x in range(4)]),
+        xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)),
+        xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)),
+    ]
+    # check if the axis options have already been added
+    if any(set(opt.label for opt in extra_axis_options).intersection(set(opt.label for opt in xyz_grid.axis_options))):
+        return
+    xyz_grid.axis_options.extend(extra_axis_options)
\ No newline at end of file
-- 
cgit v1.2.3


From 601a7b4ce5b28efd29b1668c7b8b74fb6b62f6f3 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Mon, 27 Nov 2023 22:10:31 +0900
Subject: cache divisors / fix ruff

---
 extensions-builtin/hypertile/hypertile.py          | 24 ++++++++++++++--------
 .../hypertile/scripts/hypertile_script.py          |  2 +-
 .../hypertile/scripts/hypertile_xyz.py             | 18 ++++++++--------
 3 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py
index feb02fd2..0f40e2d3 100644
--- a/extensions-builtin/hypertile/hypertile.py
+++ b/extensions-builtin/hypertile/hypertile.py
@@ -6,7 +6,6 @@ Original author: @tfernd Github: https://github.com/tfernd/HyperTile
 
 from __future__ import annotations
 
-import functools
 from dataclasses import dataclass
 from typing import Callable
 
@@ -189,20 +188,27 @@ DEPTH_LAYERS_XL = {
 
 RNG_INSTANCE = random.Random()
 
-
-def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
+@cache
+def get_divisors(value: int, min_value: int, /, max_options: int = 1) -> list[int]:
     """
-    Returns a random divisor of value that
+    Returns divisors of value that
         x * min_value <= value
-    if max_options is 1, the behavior is deterministic
+    in big -> small order, amount of divisors is limited by max_options
     """
+    max_options = max(1, max_options) # at least 1 option should be returned
     min_value = min(min_value, value)
-
-    # All big divisors of value (inclusive)
     divisors = [i for i in range(min_value, value + 1) if value % i == 0] # divisors in small -> big order
-
     ns = [value // i for i in divisors[:max_options]]  # has at least 1 element # big -> small order
+    return ns
+
 
+def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
+    """
+    Returns a random divisor of value that
+        x * min_value <= value
+    if max_options is 1, the behavior is deterministic
+    """
+    ns = get_divisors(value, min_value, max_options=max_options) # get cached divisors
     idx = RNG_INSTANCE.randint(0, len(ns) - 1)
 
     return ns[idx]
@@ -212,7 +218,7 @@ def set_hypertile_seed(seed: int) -> None:
     RNG_INSTANCE.seed(seed)
 
 
-@functools.cache
+@cache
 def largest_tile_size_available(width: int, height: int) -> int:
     """
     Calculates the largest tile size available for a given width and height
diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
index b2413cc5..d3ab6091 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_script.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -75,4 +75,4 @@ def on_ui_settings():
 
 
 script_callbacks.on_ui_settings(on_ui_settings)
-script_callbacks.on_before_ui(add_axis_options)
\ No newline at end of file
+script_callbacks.on_before_ui(add_axis_options)
diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
index eaf7c8d7..3007a083 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_xyz.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
@@ -1,17 +1,17 @@
 from modules import scripts
-xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
 from modules.shared import opts
 
+xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
+
 def int_applier(value_name:str, min_range:int = -1, max_range:int = -1):
     """
     Returns a function that applies the given value to the given value_name in opts.data.
     """
     # convert to int
     def validate(value_name:str, value:str):
-        try:
-            value = int(value)
-        except:
-            raise ValueError(f"Value {value} for {value_name} is not an integer")
+        if not value.isnumeric():
+            raise ValueError(f"Value {value} for {value_name} must be an integer")
+        value = int(value)
         # validate value
         if not min_range == -1:
             assert value >= min_range, f"Value {value} for {value_name} must be greater than or equal to {min_range}"
@@ -46,7 +46,9 @@ def add_axis_options():
         xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)),
         xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)),
     ]
-    # check if the axis options have already been added
-    if any(set(opt.label for opt in extra_axis_options).intersection(set(opt.label for opt in xyz_grid.axis_options))):
+    set_a = set([opt.label for opt in xyz_grid.axis_options])
+    set_b = set([opt.label for opt in extra_axis_options])
+    if set_a.intersection(set_b):
         return
-    xyz_grid.axis_options.extend(extra_axis_options)
\ No newline at end of file
+
+    xyz_grid.axis_options.extend(extra_axis_options)
-- 
cgit v1.2.3


From f207eb7a0d8b4443dbe665df99c31f8ff91660fd Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Mon, 27 Nov 2023 22:11:28 +0900
Subject: fix ruff in hypertile_xyz.py

---
 extensions-builtin/hypertile/scripts/hypertile_xyz.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
index 3007a083..4055a9ea 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_xyz.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
@@ -46,8 +46,8 @@ def add_axis_options():
         xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)),
         xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)),
     ]
-    set_a = set([opt.label for opt in xyz_grid.axis_options])
-    set_b = set([opt.label for opt in extra_axis_options])
+    set_a = set(opt.label for opt in xyz_grid.axis_options)
+    set_b = set(opt.label for opt in extra_axis_options)
     if set_a.intersection(set_b):
         return
 
-- 
cgit v1.2.3


From 524d6a4dbae68bf557d9c5fe686707d96841e0b5 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Mon, 27 Nov 2023 22:13:18 +0900
Subject: fix ruff - set comprehension

---
 extensions-builtin/hypertile/scripts/hypertile_xyz.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
index 4055a9ea..928e9965 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_xyz.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
@@ -46,8 +46,8 @@ def add_axis_options():
         xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)),
         xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)),
     ]
-    set_a = set(opt.label for opt in xyz_grid.axis_options)
-    set_b = set(opt.label for opt in extra_axis_options)
+    set_a = {opt.label for opt in xyz_grid.axis_options}
+    set_b = {opt.label for opt in extra_axis_options}
     if set_a.intersection(set_b):
         return
 
-- 
cgit v1.2.3


From ec78354efa179b64e92d6b98d781f6572b4eb084 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Mon, 27 Nov 2023 22:25:28 +0900
Subject: hypertile_xyz: we don't need isnumeric check for AxisOption

---
 extensions-builtin/hypertile/scripts/hypertile_xyz.py | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
index 928e9965..9e96ae3c 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_xyz.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
@@ -7,10 +7,7 @@ def int_applier(value_name:str, min_range:int = -1, max_range:int = -1):
     """
     Returns a function that applies the given value to the given value_name in opts.data.
     """
-    # convert to int
     def validate(value_name:str, value:str):
-        if not value.isnumeric():
-            raise ValueError(f"Value {value} for {value_name} must be an integer")
         value = int(value)
         # validate value
         if not min_range == -1:
-- 
cgit v1.2.3


From e294e46d46a814457fc77af13c17128bd6075d45 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 2 Dec 2023 09:26:38 +0300
Subject: split UI settings page into many

---
 .../extra-options-section/scripts/extra_options_section.py  | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/extra-options-section/scripts/extra_options_section.py b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
index 983f87ff..a903df62 100644
--- a/extensions-builtin/extra-options-section/scripts/extra_options_section.py
+++ b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
@@ -64,11 +64,14 @@ class ExtraOptionsSection(scripts.Script):
                 p.override_settings[name] = value
 
 
-shared.options_templates.update(shared.options_section(('ui', "User interface"), {
-    "extra_options_txt2img": shared.OptionInfo([], "Options in main UI - txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(),
-    "extra_options_img2img": shared.OptionInfo([], "Options in main UI - img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(),
-    "extra_options_cols": shared.OptionInfo(1, "Options in main UI - number of columns", gr.Number, {"precision": 0}).needs_reload_ui(),
-    "extra_options_accordion": shared.OptionInfo(False, "Options in main UI - place into an accordion").needs_reload_ui()
+shared.options_templates.update(shared.options_section(('settings_in_ui', "Settings in UI", "ui"), {
+    "settings_in_ui": shared.OptionHTML("""
+This page allows you to add some settings to the main interface of txt2img and img2img tabs.
+"""),
+    "extra_options_txt2img": shared.OptionInfo([], "Settings for txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(),
+    "extra_options_img2img": shared.OptionInfo([], "Settings for img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(),
+    "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Number, {"precision": 0}).needs_reload_ui(),
+    "extra_options_accordion": shared.OptionInfo(False, "Place added settings into an accordion").needs_reload_ui()
 }))
 
 
-- 
cgit v1.2.3


From 22e23dbf29b0bbc807daa57318c31145f8dd0774 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 4 Dec 2023 15:56:03 +0300
Subject: add hypertile infotext

---
 .../hypertile/scripts/hypertile_script.py          | 53 +++++++++++++++++-----
 1 file changed, 42 insertions(+), 11 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
index d3ab6091..395d584b 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_script.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -17,11 +17,42 @@ class ScriptHypertile(scripts.Script):
 
         configure_hypertile(p.width, p.height, enable_unet=shared.opts.hypertile_enable_unet)
 
+        self.add_infotext(p)
+
     def before_hr(self, p, *args):
+
+        enable = shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet
+
         # exclusive hypertile seed for the second pass
-        if not shared.opts.hypertile_enable_unet:
+        if enable:
             hypertile.set_hypertile_seed(p.all_seeds[0])
-        configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass)
+
+        configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=enable)
+
+        if enable and not shared.opts.hypertile_enable_unet:
+            p.extra_generation_params["Hypertile U-Net second pass"] = True
+
+            self.add_infotext(p, add_unet_params=True)
+
+    def add_infotext(self, p, add_unet_params=False):
+        def option(name):
+            value = getattr(shared.opts, name)
+            default_value = shared.opts.get_default(name)
+            return None if value == default_value else value
+
+        if shared.opts.hypertile_enable_unet:
+            p.extra_generation_params["Hypertile U-Net"] = True
+
+        if shared.opts.hypertile_enable_unet or add_unet_params:
+            p.extra_generation_params["Hypertile U-Net max depth"] = option('hypertile_max_depth_unet')
+            p.extra_generation_params["Hypertile U-Net max tile size"] = option('hypertile_max_tile_unet')
+            p.extra_generation_params["Hypertile U-Net swap size"] = option('hypertile_swap_size_unet')
+
+        if shared.opts.hypertile_enable_vae:
+            p.extra_generation_params["Hypertile VAE"] = True
+            p.extra_generation_params["Hypertile VAE max depth"] = option('hypertile_max_depth_vae')
+            p.extra_generation_params["Hypertile VAE max tile size"] = option('hypertile_max_tile_vae')
+            p.extra_generation_params["Hypertile VAE swap size"] = option('hypertile_swap_size_vae')
 
 
 def configure_hypertile(width, height, enable_unet=True):
@@ -57,16 +88,16 @@ def on_ui_settings():
     benefit.
     """),
 
-        "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net").info("noticeable change in details of the generated picture; if enabled, overrides the setting below"),
-        "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass"),
-        "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
-        "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
-        "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}),
+        "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net", infotext="Hypertile U-Net").info("enables hypertile for all modes, including hires fix second pass; noticeable change in details of the generated picture"),
+        "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass", infotext="Hypertile U-Net second pass").info("enables hypertile just for hires fix second pass - regardless of whether the above setting is enabled"),
+        "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile U-Net max depth").info("larger = more neural network layers affected; minor effect on performance"),
+        "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-Net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile U-Net max tile size").info("larger = worse performance"),
+        "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-Net swap size", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, infotext="Hypertile U-Net swap size"),
 
-        "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE").info("minimal change in the generated picture"),
-        "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
-        "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
-        "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}),
+        "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE", infotext="Hypertile VAE").info("minimal change in the generated picture"),
+        "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile VAE max depth"),
+        "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile VAE max tile size"),
+        "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, infotext="Hypertile VAE swap size"),
     }
 
     for name, opt in options.items():
-- 
cgit v1.2.3


From 16bdcce92d5b482d50cdc32a8f308040d320b6c9 Mon Sep 17 00:00:00 2001
From: Rene Kroon <kroon.r.w@gmail.com>
Date: Fri, 8 Dec 2023 21:19:29 +0100
Subject: #13354: solve lora loading issue

---
 extensions-builtin/Lora/networks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 7f814706..629bf853 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -159,7 +159,8 @@ def load_network(name, network_on_disk):
     bundle_embeddings = {}
 
     for key_network, weight in sd.items():
-        key_network_without_network_parts, network_part = key_network.split(".", 1)
+        key_network_without_network_parts, _, network_part = key_network.partition(".")
+
         if key_network_without_network_parts == "bundle_emb":
             emb_name, vec_name = network_part.split(".", 1)
             emb_dict = bundle_embeddings.get(emb_name, {})
-- 
cgit v1.2.3


From 1a79a5049bdfef285235e83f37b201e39dd54f81 Mon Sep 17 00:00:00 2001
From: kaalibro <konstantin.adamovich@gmail.com>
Date: Sat, 9 Dec 2023 22:35:31 +0600
Subject: Assign id for "extra_options". Replace numeric field with slider in
 Settings.

---
 .../extra-options-section/scripts/extra_options_section.py           | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/extra-options-section/scripts/extra_options_section.py b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
index a903df62..b9867fe6 100644
--- a/extensions-builtin/extra-options-section/scripts/extra_options_section.py
+++ b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
@@ -23,11 +23,12 @@ class ExtraOptionsSection(scripts.Script):
         self.setting_names = []
         self.infotext_fields = []
         extra_options = shared.opts.extra_options_img2img if is_img2img else shared.opts.extra_options_txt2img
+        elem_id_tabname = "extra_options_" + ("img2img" if is_img2img else "txt2img")
 
         mapping = {k: v for v, k in generation_parameters_copypaste.infotext_to_setting_name_mapping}
 
         with gr.Blocks() as interface:
-            with gr.Accordion("Options", open=False) if shared.opts.extra_options_accordion and extra_options else gr.Group():
+            with gr.Accordion("Options", open=False, elem_id=elem_id_tabname) if shared.opts.extra_options_accordion and extra_options else gr.Group(elem_id=elem_id_tabname):
 
                 row_count = math.ceil(len(extra_options) / shared.opts.extra_options_cols)
 
@@ -70,7 +71,7 @@ This page allows you to add some settings to the main interface of txt2img and i
 """),
     "extra_options_txt2img": shared.OptionInfo([], "Settings for txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(),
     "extra_options_img2img": shared.OptionInfo([], "Settings for img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(),
-    "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Number, {"precision": 0}).needs_reload_ui(),
+    "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Slider, {"step": 1, "minimum": 1, "maximum": 6}).needs_reload_ui(),
     "extra_options_accordion": shared.OptionInfo(False, "Place added settings into an accordion").needs_reload_ui()
 }))
 
-- 
cgit v1.2.3


From 6b8143a84e112f029ee1868b6ab98b1d2c773ead Mon Sep 17 00:00:00 2001
From: kaalibro <konstantin.adamovich@gmail.com>
Date: Sun, 10 Dec 2023 15:35:06 +0600
Subject: Number of columns slider: max count set to 20, add description info

---
 .../extra-options-section/scripts/extra_options_section.py              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/extra-options-section/scripts/extra_options_section.py b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
index b9867fe6..ac2c3de4 100644
--- a/extensions-builtin/extra-options-section/scripts/extra_options_section.py
+++ b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
@@ -71,7 +71,7 @@ This page allows you to add some settings to the main interface of txt2img and i
 """),
     "extra_options_txt2img": shared.OptionInfo([], "Settings for txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(),
     "extra_options_img2img": shared.OptionInfo([], "Settings for img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(),
-    "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Slider, {"step": 1, "minimum": 1, "maximum": 6}).needs_reload_ui(),
+    "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Slider, {"step": 1, "minimum": 1, "maximum": 20}).info("displayed amount will depend on the actual browser window width").needs_reload_ui(),
     "extra_options_accordion": shared.OptionInfo(False, "Place added settings into an accordion").needs_reload_ui()
 }))
 
-- 
cgit v1.2.3


From 735c9e8059384d4f640e5582413c30871f83eac5 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 14 Dec 2023 01:38:32 +0800
Subject: Fix network_oft

---
 extensions-builtin/Lora/network_oft.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 05c37811..44465f7a 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -53,12 +53,17 @@ class NetworkModuleOFT(network.NetworkModule):
             self.constraint = None
             self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
 
-    def calc_updown_kb(self, orig_weight, multiplier):
+    def calc_updown(self, orig_weight):
+        I = torch.eye(self.block_size, device=self.oft_blocks.device)
         oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-        oft_blocks = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+        if self.is_kohya:
+            block_Q = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+            norm_Q = torch.norm(block_Q.flatten())
+            new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
+            block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+            oft_blocks = torch.matmul(I + block_Q, (I - block_Q).float().inverse())
 
         R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
-        R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device)
 
         # This errors out for MultiheadAttention, might need to be handled up-stream
         merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
@@ -70,15 +75,10 @@ class NetworkModuleOFT(network.NetworkModule):
         merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
 
         updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+        print(torch.norm(updown))
         output_shape = orig_weight.shape
         return self.finalize_updown(updown, orig_weight, output_shape)
 
-    def calc_updown(self, orig_weight):
-        # if alpha is a very small number as in coft, calc_scale() will return a almost zero number so we ignore it
-        multiplier = self.multiplier()
-        return self.calc_updown_kb(orig_weight, multiplier)
-
-    # override to remove the multiplier/scale factor; it's already multiplied in get_weight
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         if self.bias is not None:
             updown = updown.reshape(self.bias.shape)
@@ -94,4 +94,5 @@ class NetworkModuleOFT(network.NetworkModule):
         if ex_bias is not None:
             ex_bias = ex_bias * self.multiplier()
 
-        return updown, ex_bias
+        # Ignore calc_scale, which is not used in OFT.
+        return updown * self.multiplier(), ex_bias
-- 
cgit v1.2.3


From 265bc26c21264d63956e8f30f1ce31dec917fc76 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 14 Dec 2023 01:43:24 +0800
Subject: Use self.scale instead of custom finalize

---
 extensions-builtin/Lora/network_oft.py | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 44465f7a..e3ae61a2 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -21,6 +21,8 @@ class NetworkModuleOFT(network.NetworkModule):
         self.lin_module = None
         self.org_module: list[torch.Module] = [self.sd_module]
 
+        self.scale = 1.0
+
         # kohya-ss
         if "oft_blocks" in weights.w.keys():
             self.is_kohya = True
@@ -78,21 +80,3 @@ class NetworkModuleOFT(network.NetworkModule):
         print(torch.norm(updown))
         output_shape = orig_weight.shape
         return self.finalize_updown(updown, orig_weight, output_shape)
-
-    def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
-        if self.bias is not None:
-            updown = updown.reshape(self.bias.shape)
-            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
-            updown = updown.reshape(output_shape)
-
-        if len(output_shape) == 4:
-            updown = updown.reshape(output_shape)
-
-        if orig_weight.size().numel() == updown.size().numel():
-            updown = updown.reshape(orig_weight.shape)
-
-        if ex_bias is not None:
-            ex_bias = ex_bias * self.multiplier()
-
-        # Ignore calc_scale, which is not used in OFT.
-        return updown * self.multiplier(), ex_bias
-- 
cgit v1.2.3


From 8fc67f3851babd4575d3312b931d5e7c2b0c78c6 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 14 Dec 2023 01:44:49 +0800
Subject: remove debug print

---
 extensions-builtin/Lora/network_oft.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index e3ae61a2..ff4eb59b 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -77,6 +77,5 @@ class NetworkModuleOFT(network.NetworkModule):
         merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
 
         updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
-        print(torch.norm(updown))
         output_shape = orig_weight.shape
         return self.finalize_updown(updown, orig_weight, output_shape)
-- 
cgit v1.2.3


From 3772a82a70769fe1aac884a75bf5a3313fb83328 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 14 Dec 2023 01:47:13 +0800
Subject: better naming and correct order for device.

---
 extensions-builtin/Lora/network_oft.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index ff4eb59b..fa647020 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -56,14 +56,15 @@ class NetworkModuleOFT(network.NetworkModule):
             self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
 
     def calc_updown(self, orig_weight):
-        I = torch.eye(self.block_size, device=self.oft_blocks.device)
         oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+        eye = torch.eye(self.block_size, device=self.oft_blocks.device)
+
         if self.is_kohya:
             block_Q = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
             norm_Q = torch.norm(block_Q.flatten())
             new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
             block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
-            oft_blocks = torch.matmul(I + block_Q, (I - block_Q).float().inverse())
+            oft_blocks = torch.matmul(eye + block_Q, (eye - block_Q).float().inverse())
 
         R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
 
-- 
cgit v1.2.3


From 93eae69895c34361a71dbed17348bcfd132fbc6a Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 16 Dec 2023 11:00:42 +0300
Subject: move soft inpainting to a built-in extension

---
 .../soft-inpainting/scripts/soft_inpainting.py     | 747 +++++++++++++++++++++
 1 file changed, 747 insertions(+)
 create mode 100644 extensions-builtin/soft-inpainting/scripts/soft_inpainting.py

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py
new file mode 100644
index 00000000..d9024344
--- /dev/null
+++ b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py
@@ -0,0 +1,747 @@
+import numpy as np
+import gradio as gr
+import math
+from modules.ui_components import InputAccordion
+import modules.scripts as scripts
+
+
+class SoftInpaintingSettings:
+    def __init__(self,
+                 mask_blend_power,
+                 mask_blend_scale,
+                 inpaint_detail_preservation,
+                 composite_mask_influence,
+                 composite_difference_threshold,
+                 composite_difference_contrast):
+        self.mask_blend_power = mask_blend_power
+        self.mask_blend_scale = mask_blend_scale
+        self.inpaint_detail_preservation = inpaint_detail_preservation
+        self.composite_mask_influence = composite_mask_influence
+        self.composite_difference_threshold = composite_difference_threshold
+        self.composite_difference_contrast = composite_difference_contrast
+
+    def add_generation_params(self, dest):
+        dest[enabled_gen_param_label] = True
+        dest[gen_param_labels.mask_blend_power] = self.mask_blend_power
+        dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale
+        dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation
+        dest[gen_param_labels.composite_mask_influence] = self.composite_mask_influence
+        dest[gen_param_labels.composite_difference_threshold] = self.composite_difference_threshold
+        dest[gen_param_labels.composite_difference_contrast] = self.composite_difference_contrast
+
+
+# ------------------- Methods -------------------
+
+def processing_uses_inpainting(p):
+    # TODO: Figure out a better way to determine if inpainting is being used by p
+    if getattr(p, "image_mask", None) is not None:
+        return True
+
+    if getattr(p, "mask", None) is not None:
+        return True
+
+    if getattr(p, "nmask", None) is not None:
+        return True
+
+    return False
+
+
+def latent_blend(settings, a, b, t):
+    """
+    Interpolates two latent image representations according to the parameter t,
+    where the interpolated vectors' magnitudes are also interpolated separately.
+    The "detail_preservation" factor biases the magnitude interpolation towards
+    the larger of the two magnitudes.
+    """
+    import torch
+
+    # NOTE: We use inplace operations wherever possible.
+
+    # [4][w][h] to [1][4][w][h]
+    t2 = t.unsqueeze(0)
+    # [4][w][h] to [1][1][w][h] - the [4] seem redundant.
+    t3 = t[0].unsqueeze(0).unsqueeze(0)
+
+    one_minus_t2 = 1 - t2
+    one_minus_t3 = 1 - t3
+
+    # Linearly interpolate the image vectors.
+    a_scaled = a * one_minus_t2
+    b_scaled = b * t2
+    image_interp = a_scaled
+    image_interp.add_(b_scaled)
+    result_type = image_interp.dtype
+    del a_scaled, b_scaled, t2, one_minus_t2
+
+    # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
+    # 64-bit operations are used here to allow large exponents.
+    current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001)
+
+    # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
+    a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_(
+        settings.inpaint_detail_preservation) * one_minus_t3
+    b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_(
+        settings.inpaint_detail_preservation) * t3
+    desired_magnitude = a_magnitude
+    desired_magnitude.add_(b_magnitude).pow_(1 / settings.inpaint_detail_preservation)
+    del a_magnitude, b_magnitude, t3, one_minus_t3
+
+    # Change the linearly interpolated image vectors' magnitudes to the value we want.
+    # This is the last 64-bit operation.
+    image_interp_scaling_factor = desired_magnitude
+    image_interp_scaling_factor.div_(current_magnitude)
+    image_interp_scaling_factor = image_interp_scaling_factor.to(result_type)
+    image_interp_scaled = image_interp
+    image_interp_scaled.mul_(image_interp_scaling_factor)
+    del current_magnitude
+    del desired_magnitude
+    del image_interp
+    del image_interp_scaling_factor
+    del result_type
+
+    return image_interp_scaled
+
+
+def get_modified_nmask(settings, nmask, sigma):
+    """
+    Converts a negative mask representing the transparency of the original latent vectors being overlayed
+    to a mask that is scaled according to the denoising strength for this step.
+
+    Where:
+        0 = fully opaque, infinite density, fully masked
+        1 = fully transparent, zero density, fully unmasked
+
+    We bring this transparency to a power, as this allows one to simulate N number of blending operations
+    where N can be any positive real value. Using this one can control the balance of influence between
+    the denoiser and the original latents according to the sigma value.
+
+    NOTE: "mask" is not used
+    """
+    import torch
+    return torch.pow(nmask, (sigma ** settings.mask_blend_power) * settings.mask_blend_scale)
+
+
+def apply_adaptive_masks(
+        settings: SoftInpaintingSettings,
+        nmask,
+        latent_orig,
+        latent_processed,
+        overlay_images,
+        width, height,
+        paste_to):
+    import torch
+    import modules.processing as proc
+    import modules.images as images
+    from PIL import Image, ImageOps, ImageFilter
+
+    # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control.
+    latent_mask = nmask[0].float()
+    # convert the original mask into a form we use to scale distances for thresholding
+    mask_scalar = 1 - (torch.clamp(latent_mask, min=0, max=1) ** (settings.mask_blend_scale / 2))
+    mask_scalar = (0.5 * (1 - settings.composite_mask_influence)
+                   + mask_scalar * settings.composite_mask_influence)
+    mask_scalar = mask_scalar / (1.00001 - mask_scalar)
+    mask_scalar = mask_scalar.cpu().numpy()
+
+    latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1)
+
+    kernel, kernel_center = get_gaussian_kernel(stddev_radius=1.5, max_radius=2)
+
+    masks_for_overlay = []
+
+    for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)):
+        converted_mask = distance_map.float().cpu().numpy()
+        converted_mask = weighted_histogram_filter(converted_mask, kernel, kernel_center,
+                                                   percentile_min=0.9, percentile_max=1, min_width=1)
+        converted_mask = weighted_histogram_filter(converted_mask, kernel, kernel_center,
+                                                   percentile_min=0.25, percentile_max=0.75, min_width=1)
+
+        # The distance at which opacity of original decreases to 50%
+        half_weighted_distance = settings.composite_difference_threshold * mask_scalar
+        converted_mask = converted_mask / half_weighted_distance
+
+        converted_mask = 1 / (1 + converted_mask ** settings.composite_difference_contrast)
+        converted_mask = smootherstep(converted_mask)
+        converted_mask = 1 - converted_mask
+        converted_mask = 255. * converted_mask
+        converted_mask = converted_mask.astype(np.uint8)
+        converted_mask = Image.fromarray(converted_mask)
+        converted_mask = images.resize_image(2, converted_mask, width, height)
+        converted_mask = proc.create_binary_mask(converted_mask, round=False)
+
+        # Remove aliasing artifacts using a gaussian blur.
+        converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
+
+        # Expand the mask to fit the whole image if needed.
+        if paste_to is not None:
+            converted_mask = proc.uncrop(converted_mask,
+                                         (overlay_image.width, overlay_image.height),
+                                         paste_to)
+
+        masks_for_overlay.append(converted_mask)
+
+        image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
+        image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
+                           mask=ImageOps.invert(converted_mask.convert('L')))
+
+        overlay_images[i] = image_masked.convert('RGBA')
+
+    return masks_for_overlay
+
+
+def apply_masks(
+        settings,
+        nmask,
+        overlay_images,
+        width, height,
+        paste_to):
+    import torch
+    import modules.processing as proc
+    import modules.images as images
+    from PIL import Image, ImageOps, ImageFilter
+
+    converted_mask = nmask[0].float()
+    converted_mask = torch.clamp(converted_mask, min=0, max=1).pow_(settings.mask_blend_scale / 2)
+    converted_mask = 255. * converted_mask
+    converted_mask = converted_mask.cpu().numpy().astype(np.uint8)
+    converted_mask = Image.fromarray(converted_mask)
+    converted_mask = images.resize_image(2, converted_mask, width, height)
+    converted_mask = proc.create_binary_mask(converted_mask, round=False)
+
+    # Remove aliasing artifacts using a gaussian blur.
+    converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
+
+    # Expand the mask to fit the whole image if needed.
+    if paste_to is not None:
+        converted_mask = proc.uncrop(converted_mask,
+                                     (width, height),
+                                     paste_to)
+
+    masks_for_overlay = []
+
+    for i, overlay_image in enumerate(overlay_images):
+        masks_for_overlay[i] = converted_mask
+
+        image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
+        image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
+                           mask=ImageOps.invert(converted_mask.convert('L')))
+
+        overlay_images[i] = image_masked.convert('RGBA')
+
+    return masks_for_overlay
+
+
+def weighted_histogram_filter(img, kernel, kernel_center, percentile_min=0.0, percentile_max=1.0, min_width=1.0):
+    """
+    Generalization convolution filter capable of applying
+    weighted mean, median, maximum, and minimum filters
+    parametrically using an arbitrary kernel.
+
+    Args:
+        img (nparray):
+            The image, a 2-D array of floats, to which the filter is being applied.
+        kernel (nparray):
+            The kernel, a 2-D array of floats.
+        kernel_center (nparray):
+            The kernel center coordinate, a 1-D array with two elements.
+        percentile_min (float):
+            The lower bound of the histogram window used by the filter,
+            from 0 to 1.
+        percentile_max (float):
+            The upper bound of the histogram window used by the filter,
+            from 0 to 1.
+        min_width (float):
+            The minimum size of the histogram window bounds, in weight units.
+            Must be greater than 0.
+
+    Returns:
+        (nparray): A filtered copy of the input image "img", a 2-D array of floats.
+    """
+
+    # Converts an index tuple into a vector.
+    def vec(x):
+        return np.array(x)
+
+    kernel_min = -kernel_center
+    kernel_max = vec(kernel.shape) - kernel_center
+
+    def weighted_histogram_filter_single(idx):
+        idx = vec(idx)
+        min_index = np.maximum(0, idx + kernel_min)
+        max_index = np.minimum(vec(img.shape), idx + kernel_max)
+        window_shape = max_index - min_index
+
+        class WeightedElement:
+            """
+            An element of the histogram, its weight
+            and bounds.
+            """
+
+            def __init__(self, value, weight):
+                self.value: float = value
+                self.weight: float = weight
+                self.window_min: float = 0.0
+                self.window_max: float = 1.0
+
+        # Collect the values in the image as WeightedElements,
+        # weighted by their corresponding kernel values.
+        values = []
+        for window_tup in np.ndindex(tuple(window_shape)):
+            window_index = vec(window_tup)
+            image_index = window_index + min_index
+            centered_kernel_index = image_index - idx
+            kernel_index = centered_kernel_index + kernel_center
+            element = WeightedElement(img[tuple(image_index)], kernel[tuple(kernel_index)])
+            values.append(element)
+
+        def sort_key(x: WeightedElement):
+            return x.value
+
+        values.sort(key=sort_key)
+
+        # Calculate the height of the stack (sum)
+        # and each sample's range they occupy in the stack
+        sum = 0
+        for i in range(len(values)):
+            values[i].window_min = sum
+            sum += values[i].weight
+            values[i].window_max = sum
+
+        # Calculate what range of this stack ("window")
+        # we want to get the weighted average across.
+        window_min = sum * percentile_min
+        window_max = sum * percentile_max
+        window_width = window_max - window_min
+
+        # Ensure the window is within the stack and at least a certain size.
+        if window_width < min_width:
+            window_center = (window_min + window_max) / 2
+            window_min = window_center - min_width / 2
+            window_max = window_center + min_width / 2
+
+            if window_max > sum:
+                window_max = sum
+                window_min = sum - min_width
+
+            if window_min < 0:
+                window_min = 0
+                window_max = min_width
+
+        value = 0
+        value_weight = 0
+
+        # Get the weighted average of all the samples
+        # that overlap with the window, weighted
+        # by the size of their overlap.
+        for i in range(len(values)):
+            if window_min >= values[i].window_max:
+                continue
+            if window_max <= values[i].window_min:
+                break
+
+            s = max(window_min, values[i].window_min)
+            e = min(window_max, values[i].window_max)
+            w = e - s
+
+            value += values[i].value * w
+            value_weight += w
+
+        return value / value_weight if value_weight != 0 else 0
+
+    img_out = img.copy()
+
+    # Apply the kernel operation over each pixel.
+    for index in np.ndindex(img.shape):
+        img_out[index] = weighted_histogram_filter_single(index)
+
+    return img_out
+
+
+def smoothstep(x):
+    """
+    The smoothstep function, input should be clamped to 0-1 range.
+    Turns a diagonal line (f(x) = x) into a sigmoid-like curve.
+    """
+    return x * x * (3 - 2 * x)
+
+
+def smootherstep(x):
+    """
+    The smootherstep function, input should be clamped to 0-1 range.
+    Turns a diagonal line (f(x) = x) into a sigmoid-like curve.
+    """
+    return x * x * x * (x * (6 * x - 15) + 10)
+
+
+def get_gaussian_kernel(stddev_radius=1.0, max_radius=2):
+    """
+    Creates a Gaussian kernel with thresholded edges.
+
+    Args:
+        stddev_radius (float):
+            Standard deviation of the gaussian kernel, in pixels.
+        max_radius (int):
+            The size of the filter kernel. The number of pixels is (max_radius*2+1) ** 2.
+            The kernel is thresholded so that any values one pixel beyond this radius
+            is weighted at 0.
+
+    Returns:
+        (nparray, nparray): A kernel array (shape: (N, N)), its center coordinate (shape: (2))
+    """
+
+    # Evaluates a 0-1 normalized gaussian function for a given square distance from the mean.
+    def gaussian(sqr_mag):
+        return math.exp(-sqr_mag / (stddev_radius * stddev_radius))
+
+    # Helper function for converting a tuple to an array.
+    def vec(x):
+        return np.array(x)
+
+    """
+    Since a gaussian is unbounded, we need to limit ourselves
+    to a finite range.
+    We taper the ends off at the end of that range so they equal zero
+    while preserving the maximum value of 1 at the mean.
+    """
+    zero_radius = max_radius + 1.0
+    gauss_zero = gaussian(zero_radius * zero_radius)
+    gauss_kernel_scale = 1 / (1 - gauss_zero)
+
+    def gaussian_kernel_func(coordinate):
+        x = coordinate[0] ** 2.0 + coordinate[1] ** 2.0
+        x = gaussian(x)
+        x -= gauss_zero
+        x *= gauss_kernel_scale
+        x = max(0.0, x)
+        return x
+
+    size = max_radius * 2 + 1
+    kernel_center = max_radius
+    kernel = np.zeros((size, size))
+
+    for index in np.ndindex(kernel.shape):
+        kernel[index] = gaussian_kernel_func(vec(index) - kernel_center)
+
+    return kernel, kernel_center
+
+
+# ------------------- Constants -------------------
+
+
+default = SoftInpaintingSettings(1, 0.5, 4, 0, 0.5, 2)
+
+enabled_ui_label = "Soft inpainting"
+enabled_gen_param_label = "Soft inpainting enabled"
+enabled_el_id = "soft_inpainting_enabled"
+
+ui_labels = SoftInpaintingSettings(
+    "Schedule bias",
+    "Preservation strength",
+    "Transition contrast boost",
+    "Mask influence",
+    "Difference threshold",
+    "Difference contrast")
+
+ui_info = SoftInpaintingSettings(
+    "Shifts when preservation of original content occurs during denoising.",
+    "How strongly partially masked content should be preserved.",
+    "Amplifies the contrast that may be lost in partially masked regions.",
+    "How strongly the original mask should bias the difference threshold.",
+    "How much an image region can change before the original pixels are not blended in anymore.",
+    "How sharp the transition should be between blended and not blended.")
+
+gen_param_labels = SoftInpaintingSettings(
+    "Soft inpainting schedule bias",
+    "Soft inpainting preservation strength",
+    "Soft inpainting transition contrast boost",
+    "Soft inpainting mask influence",
+    "Soft inpainting difference threshold",
+    "Soft inpainting difference contrast")
+
+el_ids = SoftInpaintingSettings(
+    "mask_blend_power",
+    "mask_blend_scale",
+    "inpaint_detail_preservation",
+    "composite_mask_influence",
+    "composite_difference_threshold",
+    "composite_difference_contrast")
+
+
+# ------------------- Script -------------------
+
+
+class Script(scripts.Script):
+    def __init__(self):
+        self.section = "inpaint"
+        self.masks_for_overlay = None
+        self.overlay_images = None
+
+    def title(self):
+        return "Soft Inpainting"
+
+    def show(self, is_img2img):
+        return scripts.AlwaysVisible if is_img2img else False
+
+    def ui(self, is_img2img):
+        if not is_img2img:
+            return
+
+        with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled:
+            with gr.Group():
+                gr.Markdown(
+                    """
+                    Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity.
+                    **High _Mask blur_** values are recommended!
+                    """)
+
+                power = \
+                    gr.Slider(label=ui_labels.mask_blend_power,
+                              info=ui_info.mask_blend_power,
+                              minimum=0,
+                              maximum=8,
+                              step=0.1,
+                              value=default.mask_blend_power,
+                              elem_id=el_ids.mask_blend_power)
+                scale = \
+                    gr.Slider(label=ui_labels.mask_blend_scale,
+                              info=ui_info.mask_blend_scale,
+                              minimum=0,
+                              maximum=8,
+                              step=0.05,
+                              value=default.mask_blend_scale,
+                              elem_id=el_ids.mask_blend_scale)
+                detail = \
+                    gr.Slider(label=ui_labels.inpaint_detail_preservation,
+                              info=ui_info.inpaint_detail_preservation,
+                              minimum=1,
+                              maximum=32,
+                              step=0.5,
+                              value=default.inpaint_detail_preservation,
+                              elem_id=el_ids.inpaint_detail_preservation)
+
+                gr.Markdown(
+                    """
+                    ### Pixel Composite Settings
+                    """)
+
+                mask_inf = \
+                    gr.Slider(label=ui_labels.composite_mask_influence,
+                              info=ui_info.composite_mask_influence,
+                              minimum=0,
+                              maximum=1,
+                              step=0.05,
+                              value=default.composite_mask_influence,
+                              elem_id=el_ids.composite_mask_influence)
+
+                dif_thresh = \
+                    gr.Slider(label=ui_labels.composite_difference_threshold,
+                              info=ui_info.composite_difference_threshold,
+                              minimum=0,
+                              maximum=8,
+                              step=0.25,
+                              value=default.composite_difference_threshold,
+                              elem_id=el_ids.composite_difference_threshold)
+
+                dif_contr = \
+                    gr.Slider(label=ui_labels.composite_difference_contrast,
+                              info=ui_info.composite_difference_contrast,
+                              minimum=0,
+                              maximum=8,
+                              step=0.25,
+                              value=default.composite_difference_contrast,
+                              elem_id=el_ids.composite_difference_contrast)
+
+                with gr.Accordion("Help", open=False):
+                    gr.Markdown(
+                        f"""
+                        ### {ui_labels.mask_blend_power}
+
+                        The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas).
+                        This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step.
+                        This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation.
+
+                        - **Below 1**: Stronger preservation near the end (with low sigma)
+                        - **1**: Balanced (proportional to sigma)
+                        - **Above 1**: Stronger preservation in the beginning (with high sigma)
+                        """)
+                    gr.Markdown(
+                        f"""
+                        ### {ui_labels.mask_blend_scale}
+
+                        Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content.
+                        This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength.
+
+                        - **Low values**: Favors generated content.
+                        - **High values**: Favors original content.
+                        """)
+                    gr.Markdown(
+                        f"""
+                        ### {ui_labels.inpaint_detail_preservation}
+
+                        This parameter controls how the original latent vectors and denoised latent vectors are interpolated.
+                        With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors.
+                        This can prevent the loss of contrast that occurs with linear interpolation.
+
+                        - **Low values**: Softer blending, details may fade.
+                        - **High values**: Stronger contrast, may over-saturate colors.
+                        """)
+
+                    gr.Markdown(
+                        """
+                        ## Pixel Composite Settings
+
+                        Masks are generated based on how much a part of the image changed after denoising.
+                        These masks are used to blend the original and final images together.
+                        If the difference is low, the original pixels are used instead of the pixels returned by the inpainting process.
+                        """)
+
+                    gr.Markdown(
+                        f"""
+                        ### {ui_labels.composite_mask_influence}
+
+                        This parameter controls how much the mask should bias this sensitivity to difference.
+
+                        - **0**: Ignore the mask, only consider differences in image content.
+                        - **1**: Follow the mask closely despite image content changes.
+                        """)
+
+                    gr.Markdown(
+                        f"""
+                        ### {ui_labels.composite_difference_threshold}
+
+                        This value represents the difference at which the original pixels will have less than 50% opacity.
+
+                        - **Low values**: Two images patches must be almost the same in order to retain original pixels.
+                        - **High values**: Two images patches can be very different and still retain original pixels.
+                        """)
+
+                    gr.Markdown(
+                        f"""
+                        ### {ui_labels.composite_difference_contrast}
+
+                        This value represents the contrast between the opacity of the original and inpainted content.
+
+                        - **Low values**: The blend will be more gradual and have longer transitions, but may cause ghosting.
+                        - **High values**: Ghosting will be less common, but transitions may be very sudden.
+                        """)
+
+        self.infotext_fields = [(soft_inpainting_enabled, enabled_gen_param_label),
+                                (power, gen_param_labels.mask_blend_power),
+                                (scale, gen_param_labels.mask_blend_scale),
+                                (detail, gen_param_labels.inpaint_detail_preservation),
+                                (mask_inf, gen_param_labels.composite_mask_influence),
+                                (dif_thresh, gen_param_labels.composite_difference_threshold),
+                                (dif_contr, gen_param_labels.composite_difference_contrast)]
+
+        self.paste_field_names = []
+        for _, field_name in self.infotext_fields:
+            self.paste_field_names.append(field_name)
+
+        return [soft_inpainting_enabled,
+                power,
+                scale,
+                detail,
+                mask_inf,
+                dif_thresh,
+                dif_contr]
+
+    def process(self, p, enabled, power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr):
+        if not enabled:
+            return
+
+        if not processing_uses_inpainting(p):
+            return
+
+        # Shut off the rounding it normally does.
+        p.mask_round = False
+
+        settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr)
+
+        # p.extra_generation_params["Mask rounding"] = False
+        settings.add_generation_params(p.extra_generation_params)
+
+    def on_mask_blend(self, p, mba: scripts.MaskBlendArgs, enabled, power, scale, detail_preservation, mask_inf,
+                      dif_thresh, dif_contr):
+        if not enabled:
+            return
+
+        if not processing_uses_inpainting(p):
+            return
+
+        if mba.is_final_blend:
+            mba.blended_latent = mba.current_latent
+            return
+
+        settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr)
+
+        # todo: Why is sigma 2D? Both values are the same.
+        mba.blended_latent = latent_blend(settings,
+                                          mba.init_latent,
+                                          mba.current_latent,
+                                          get_modified_nmask(settings, mba.nmask, mba.sigma[0]))
+
+    def post_sample(self, p, ps: scripts.PostSampleArgs, enabled, power, scale, detail_preservation, mask_inf,
+                    dif_thresh, dif_contr):
+        if not enabled:
+            return
+
+        if not processing_uses_inpainting(p):
+            return
+
+        nmask = getattr(p, "nmask", None)
+        if nmask is None:
+            return
+
+        from modules import images
+        from modules.shared import opts
+
+        settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr)
+
+        # since the original code puts holes in the existing overlay images,
+        # we have to rebuild them.
+        self.overlay_images = []
+        for img in p.init_images:
+
+            image = images.flatten(img, opts.img2img_background_color)
+
+            if p.paste_to is None and p.resize_mode != 3:
+                image = images.resize_image(p.resize_mode, image, p.width, p.height)
+
+            self.overlay_images.append(image.convert('RGBA'))
+
+        if len(p.init_images) == 1:
+            self.overlay_images = self.overlay_images * p.batch_size
+
+        if getattr(ps.samples, 'already_decoded', False):
+            self.masks_for_overlay = apply_masks(settings=settings,
+                                                 nmask=nmask,
+                                                 overlay_images=self.overlay_images,
+                                                 width=p.width,
+                                                 height=p.height,
+                                                 paste_to=p.paste_to)
+        else:
+            self.masks_for_overlay = apply_adaptive_masks(settings=settings,
+                                                          nmask=nmask,
+                                                          latent_orig=p.init_latent,
+                                                          latent_processed=ps.samples,
+                                                          overlay_images=self.overlay_images,
+                                                          width=p.width,
+                                                          height=p.height,
+                                                          paste_to=p.paste_to)
+
+    def postprocess_maskoverlay(self, p, ppmo: scripts.PostProcessMaskOverlayArgs, enabled, power, scale,
+                                detail_preservation, mask_inf, dif_thresh, dif_contr):
+        if not enabled:
+            return
+
+        if not processing_uses_inpainting(p):
+            return
+
+        if self.masks_for_overlay is None:
+            return
+
+        if self.overlay_images is None:
+            return
+
+        ppmo.mask_for_overlay = self.masks_for_overlay[ppmo.index]
+        ppmo.overlay_image = self.overlay_images[ppmo.index]
-- 
cgit v1.2.3


From 59d060fd5ea93fcc3fdbfbd13b6e20fda06ecf94 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 30 Dec 2023 17:11:03 +0900
Subject: More lora not found warning

---
 extensions-builtin/Lora/networks.py            | 8 +++++++-
 extensions-builtin/Lora/scripts/lora_script.py | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 985b2753..72ebd624 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -1,3 +1,4 @@
+import gradio as gr
 import logging
 import os
 import re
@@ -314,7 +315,12 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
                 emb_db.skipped_embeddings[name] = embedding
 
     if failed_to_load_networks:
-        sd_hijack.model_hijack.comments.append("Networks not found: " + ", ".join(failed_to_load_networks))
+        lora_not_found_message = f'Lora not found: {", ".join(failed_to_load_networks)}'
+        sd_hijack.model_hijack.comments.append(lora_not_found_message)
+        if shared.opts.lora_not_found_warning_console:
+            print(f'\n{lora_not_found_message}\n')
+        if shared.opts.lora_not_found_gradio_warning:
+            gr.Warning(lora_not_found_message)
 
     purge_networks_from_memory()
 
diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py
index ef23968c..1518f7e5 100644
--- a/extensions-builtin/Lora/scripts/lora_script.py
+++ b/extensions-builtin/Lora/scripts/lora_script.py
@@ -39,6 +39,8 @@ shared.options_templates.update(shared.options_section(('extra_networks', "Extra
     "lora_show_all": shared.OptionInfo(False, "Always show all networks on the Lora page").info("otherwise, those detected as for incompatible version of Stable Diffusion will be hidden"),
     "lora_hide_unknown_for_versions": shared.OptionInfo([], "Hide networks of unknown versions for model versions", gr.CheckboxGroup, {"choices": ["SD1", "SD2", "SDXL"]}),
     "lora_in_memory_limit": shared.OptionInfo(0, "Number of Lora networks to keep cached in memory", gr.Number, {"precision": 0}),
+    "lora_not_found_warning_console": shared.OptionInfo(False, "Lora not found warning in console"),
+    "lora_not_found_gradio_warning": shared.OptionInfo(False, "Lora not found warning popup in webui"),
 }))
 
 
-- 
cgit v1.2.3


From b0f59342346b1c8b405f97c0e0bb01c6ae05c601 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Mon, 25 Dec 2023 14:43:51 +0200
Subject: Use Spandrel for upscaling and face restoration architectures (aside
 from GFPGAN and LDSR)

---
 extensions-builtin/ScuNET/scripts/scunet_model.py |   13 +-
 extensions-builtin/ScuNET/scunet_model_arch.py    |  268 ------
 extensions-builtin/SwinIR/scripts/swinir_model.py |  126 ++-
 extensions-builtin/SwinIR/swinir_model_arch.py    |  867 ------------------
 extensions-builtin/SwinIR/swinir_model_arch_v2.py | 1017 ---------------------
 5 files changed, 62 insertions(+), 2229 deletions(-)
 delete mode 100644 extensions-builtin/ScuNET/scunet_model_arch.py
 delete mode 100644 extensions-builtin/SwinIR/swinir_model_arch.py
 delete mode 100644 extensions-builtin/SwinIR/swinir_model_arch_v2.py

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py
index 167d2f64..18cf8e1a 100644
--- a/extensions-builtin/ScuNET/scripts/scunet_model.py
+++ b/extensions-builtin/ScuNET/scripts/scunet_model.py
@@ -7,9 +7,7 @@ from tqdm import tqdm
 
 import modules.upscaler
 from modules import devices, modelloader, script_callbacks, errors
-from scunet_model_arch import SCUNet
 
-from modules.modelloader import load_file_from_url
 from modules.shared import opts
 
 
@@ -120,17 +118,10 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
         device = devices.get_device_for('scunet')
         if path.startswith("http"):
             # TODO: this doesn't use `path` at all?
-            filename = load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name=f"{self.name}.pth")
+            filename = modelloader.load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name=f"{self.name}.pth")
         else:
             filename = path
-        model = SCUNet(in_nc=3, config=[4, 4, 4, 4, 4, 4, 4], dim=64)
-        model.load_state_dict(torch.load(filename), strict=True)
-        model.eval()
-        for _, v in model.named_parameters():
-            v.requires_grad = False
-        model = model.to(device)
-
-        return model
+        return modelloader.load_spandrel_model(filename, device=device)
 
 
 def on_ui_settings():
diff --git a/extensions-builtin/ScuNET/scunet_model_arch.py b/extensions-builtin/ScuNET/scunet_model_arch.py
deleted file mode 100644
index b51a8806..00000000
--- a/extensions-builtin/ScuNET/scunet_model_arch.py
+++ /dev/null
@@ -1,268 +0,0 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import torch
-import torch.nn as nn
-from einops import rearrange
-from einops.layers.torch import Rearrange
-from timm.models.layers import trunc_normal_, DropPath
-
-
-class WMSA(nn.Module):
-    """ Self-attention module in Swin Transformer
-    """
-
-    def __init__(self, input_dim, output_dim, head_dim, window_size, type):
-        super(WMSA, self).__init__()
-        self.input_dim = input_dim
-        self.output_dim = output_dim
-        self.head_dim = head_dim
-        self.scale = self.head_dim ** -0.5
-        self.n_heads = input_dim // head_dim
-        self.window_size = window_size
-        self.type = type
-        self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True)
-
-        self.relative_position_params = nn.Parameter(
-            torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads))
-
-        self.linear = nn.Linear(self.input_dim, self.output_dim)
-
-        trunc_normal_(self.relative_position_params, std=.02)
-        self.relative_position_params = torch.nn.Parameter(
-            self.relative_position_params.view(2 * window_size - 1, 2 * window_size - 1, self.n_heads).transpose(1,
-                                                                                                                 2).transpose(
-                0, 1))
-
-    def generate_mask(self, h, w, p, shift):
-        """ generating the mask of SW-MSA
-        Args:
-            shift: shift parameters in CyclicShift.
-        Returns:
-            attn_mask: should be (1 1 w p p),
-        """
-        # supporting square.
-        attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device)
-        if self.type == 'W':
-            return attn_mask
-
-        s = p - shift
-        attn_mask[-1, :, :s, :, s:, :] = True
-        attn_mask[-1, :, s:, :, :s, :] = True
-        attn_mask[:, -1, :, :s, :, s:] = True
-        attn_mask[:, -1, :, s:, :, :s] = True
-        attn_mask = rearrange(attn_mask, 'w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)')
-        return attn_mask
-
-    def forward(self, x):
-        """ Forward pass of Window Multi-head Self-attention module.
-        Args:
-            x: input tensor with shape of [b h w c];
-            attn_mask: attention mask, fill -inf where the value is True;
-        Returns:
-            output: tensor shape [b h w c]
-        """
-        if self.type != 'W':
-            x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2))
-
-        x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size)
-        h_windows = x.size(1)
-        w_windows = x.size(2)
-        # square validation
-        # assert h_windows == w_windows
-
-        x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size)
-        qkv = self.embedding_layer(x)
-        q, k, v = rearrange(qkv, 'b nw np (threeh c) -> threeh b nw np c', c=self.head_dim).chunk(3, dim=0)
-        sim = torch.einsum('hbwpc,hbwqc->hbwpq', q, k) * self.scale
-        # Adding learnable relative embedding
-        sim = sim + rearrange(self.relative_embedding(), 'h p q -> h 1 1 p q')
-        # Using Attn Mask to distinguish different subwindows.
-        if self.type != 'W':
-            attn_mask = self.generate_mask(h_windows, w_windows, self.window_size, shift=self.window_size // 2)
-            sim = sim.masked_fill_(attn_mask, float("-inf"))
-
-        probs = nn.functional.softmax(sim, dim=-1)
-        output = torch.einsum('hbwij,hbwjc->hbwic', probs, v)
-        output = rearrange(output, 'h b w p c -> b w p (h c)')
-        output = self.linear(output)
-        output = rearrange(output, 'b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c', w1=h_windows, p1=self.window_size)
-
-        if self.type != 'W':
-            output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2), dims=(1, 2))
-
-        return output
-
-    def relative_embedding(self):
-        cord = torch.tensor(np.array([[i, j] for i in range(self.window_size) for j in range(self.window_size)]))
-        relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1
-        # negative is allowed
-        return self.relative_position_params[:, relation[:, :, 0].long(), relation[:, :, 1].long()]
-
-
-class Block(nn.Module):
-    def __init__(self, input_dim, output_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
-        """ SwinTransformer Block
-        """
-        super(Block, self).__init__()
-        self.input_dim = input_dim
-        self.output_dim = output_dim
-        assert type in ['W', 'SW']
-        self.type = type
-        if input_resolution <= window_size:
-            self.type = 'W'
-
-        self.ln1 = nn.LayerNorm(input_dim)
-        self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type)
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.ln2 = nn.LayerNorm(input_dim)
-        self.mlp = nn.Sequential(
-            nn.Linear(input_dim, 4 * input_dim),
-            nn.GELU(),
-            nn.Linear(4 * input_dim, output_dim),
-        )
-
-    def forward(self, x):
-        x = x + self.drop_path(self.msa(self.ln1(x)))
-        x = x + self.drop_path(self.mlp(self.ln2(x)))
-        return x
-
-
-class ConvTransBlock(nn.Module):
-    def __init__(self, conv_dim, trans_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
-        """ SwinTransformer and Conv Block
-        """
-        super(ConvTransBlock, self).__init__()
-        self.conv_dim = conv_dim
-        self.trans_dim = trans_dim
-        self.head_dim = head_dim
-        self.window_size = window_size
-        self.drop_path = drop_path
-        self.type = type
-        self.input_resolution = input_resolution
-
-        assert self.type in ['W', 'SW']
-        if self.input_resolution <= self.window_size:
-            self.type = 'W'
-
-        self.trans_block = Block(self.trans_dim, self.trans_dim, self.head_dim, self.window_size, self.drop_path,
-                                 self.type, self.input_resolution)
-        self.conv1_1 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
-        self.conv1_2 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
-
-        self.conv_block = nn.Sequential(
-            nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False),
-            nn.ReLU(True),
-            nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False)
-        )
-
-    def forward(self, x):
-        conv_x, trans_x = torch.split(self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1)
-        conv_x = self.conv_block(conv_x) + conv_x
-        trans_x = Rearrange('b c h w -> b h w c')(trans_x)
-        trans_x = self.trans_block(trans_x)
-        trans_x = Rearrange('b h w c -> b c h w')(trans_x)
-        res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1))
-        x = x + res
-
-        return x
-
-
-class SCUNet(nn.Module):
-    # def __init__(self, in_nc=3, config=[2, 2, 2, 2, 2, 2, 2], dim=64, drop_path_rate=0.0, input_resolution=256):
-    def __init__(self, in_nc=3, config=None, dim=64, drop_path_rate=0.0, input_resolution=256):
-        super(SCUNet, self).__init__()
-        if config is None:
-            config = [2, 2, 2, 2, 2, 2, 2]
-        self.config = config
-        self.dim = dim
-        self.head_dim = 32
-        self.window_size = 8
-
-        # drop path rate for each layer
-        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))]
-
-        self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)]
-
-        begin = 0
-        self.m_down1 = [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
-                                       'W' if not i % 2 else 'SW', input_resolution)
-                        for i in range(config[0])] + \
-                       [nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)]
-
-        begin += config[0]
-        self.m_down2 = [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
-                                       'W' if not i % 2 else 'SW', input_resolution // 2)
-                        for i in range(config[1])] + \
-                       [nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)]
-
-        begin += config[1]
-        self.m_down3 = [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
-                                       'W' if not i % 2 else 'SW', input_resolution // 4)
-                        for i in range(config[2])] + \
-                       [nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)]
-
-        begin += config[2]
-        self.m_body = [ConvTransBlock(4 * dim, 4 * dim, self.head_dim, self.window_size, dpr[i + begin],
-                                      'W' if not i % 2 else 'SW', input_resolution // 8)
-                       for i in range(config[3])]
-
-        begin += config[3]
-        self.m_up3 = [nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False), ] + \
-                     [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
-                                     'W' if not i % 2 else 'SW', input_resolution // 4)
-                      for i in range(config[4])]
-
-        begin += config[4]
-        self.m_up2 = [nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False), ] + \
-                     [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
-                                     'W' if not i % 2 else 'SW', input_resolution // 2)
-                      for i in range(config[5])]
-
-        begin += config[5]
-        self.m_up1 = [nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False), ] + \
-                     [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
-                                     'W' if not i % 2 else 'SW', input_resolution)
-                      for i in range(config[6])]
-
-        self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)]
-
-        self.m_head = nn.Sequential(*self.m_head)
-        self.m_down1 = nn.Sequential(*self.m_down1)
-        self.m_down2 = nn.Sequential(*self.m_down2)
-        self.m_down3 = nn.Sequential(*self.m_down3)
-        self.m_body = nn.Sequential(*self.m_body)
-        self.m_up3 = nn.Sequential(*self.m_up3)
-        self.m_up2 = nn.Sequential(*self.m_up2)
-        self.m_up1 = nn.Sequential(*self.m_up1)
-        self.m_tail = nn.Sequential(*self.m_tail)
-        # self.apply(self._init_weights)
-
-    def forward(self, x0):
-
-        h, w = x0.size()[-2:]
-        paddingBottom = int(np.ceil(h / 64) * 64 - h)
-        paddingRight = int(np.ceil(w / 64) * 64 - w)
-        x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0)
-
-        x1 = self.m_head(x0)
-        x2 = self.m_down1(x1)
-        x3 = self.m_down2(x2)
-        x4 = self.m_down3(x3)
-        x = self.m_body(x4)
-        x = self.m_up3(x + x4)
-        x = self.m_up2(x + x3)
-        x = self.m_up1(x + x2)
-        x = self.m_tail(x + x1)
-
-        x = x[..., :h, :w]
-
-        return x
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight, std=.02)
-            if m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index ae0d0e6a..85c18b9e 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -1,5 +1,5 @@
+import logging
 import sys
-import platform
 
 import numpy as np
 import torch
@@ -8,13 +8,11 @@ from tqdm import tqdm
 
 from modules import modelloader, devices, script_callbacks, shared
 from modules.shared import opts, state
-from swinir_model_arch import SwinIR
-from swinir_model_arch_v2 import Swin2SR
 from modules.upscaler import Upscaler, UpscalerData
 
 SWINIR_MODEL_URL = "https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth"
 
-device_swinir = devices.get_device_for('swinir')
+logger = logging.getLogger(__name__)
 
 
 class UpscalerSwinIR(Upscaler):
@@ -37,26 +35,29 @@ class UpscalerSwinIR(Upscaler):
             scalers.append(model_data)
         self.scalers = scalers
 
-    def do_upscale(self, img, model_file):
-        use_compile = hasattr(opts, 'SWIN_torch_compile') and opts.SWIN_torch_compile \
-            and int(torch.__version__.split('.')[0]) >= 2 and platform.system() != "Windows"
+    def do_upscale(self, img: Image.Image, model_file: str) -> Image.Image:
         current_config = (model_file, opts.SWIN_tile)
 
-        if use_compile and self._cached_model_config == current_config:
+        device = self._get_device()
+
+        if self._cached_model_config == current_config:
             model = self._cached_model
         else:
-            self._cached_model = None
             try:
                 model = self.load_model(model_file)
             except Exception as e:
                 print(f"Failed loading SwinIR model {model_file}: {e}", file=sys.stderr)
                 return img
-            model = model.to(device_swinir, dtype=devices.dtype)
-            if use_compile:
-                model = torch.compile(model)
-                self._cached_model = model
-                self._cached_model_config = current_config
-        img = upscale(img, model)
+            self._cached_model = model
+            self._cached_model_config = current_config
+
+        img = upscale(
+            img,
+            model,
+            tile=opts.SWIN_tile,
+            tile_overlap=opts.SWIN_tile_overlap,
+            device=device,
+        )
         devices.torch_gc()
         return img
 
@@ -69,69 +70,54 @@ class UpscalerSwinIR(Upscaler):
             )
         else:
             filename = path
-        if filename.endswith(".v2.pth"):
-            model = Swin2SR(
-                upscale=scale,
-                in_chans=3,
-                img_size=64,
-                window_size=8,
-                img_range=1.0,
-                depths=[6, 6, 6, 6, 6, 6],
-                embed_dim=180,
-                num_heads=[6, 6, 6, 6, 6, 6],
-                mlp_ratio=2,
-                upsampler="nearest+conv",
-                resi_connection="1conv",
-            )
-            params = None
-        else:
-            model = SwinIR(
-                upscale=scale,
-                in_chans=3,
-                img_size=64,
-                window_size=8,
-                img_range=1.0,
-                depths=[6, 6, 6, 6, 6, 6, 6, 6, 6],
-                embed_dim=240,
-                num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],
-                mlp_ratio=2,
-                upsampler="nearest+conv",
-                resi_connection="3conv",
-            )
-            params = "params_ema"
 
-        pretrained_model = torch.load(filename)
-        if params is not None:
-            model.load_state_dict(pretrained_model[params], strict=True)
-        else:
-            model.load_state_dict(pretrained_model, strict=True)
+        model = modelloader.load_spandrel_model(
+            filename,
+            device=self._get_device(),
+            dtype=devices.dtype,
+        )
+        if getattr(opts, 'SWIN_torch_compile', False):
+            try:
+                model = torch.compile(model)
+            except Exception:
+                logger.warning("Failed to compile SwinIR model, fallback to JIT", exc_info=True)
         return model
 
+    def _get_device(self):
+        return devices.get_device_for('swinir')
+
 
 def upscale(
-        img,
-        model,
-        tile=None,
-        tile_overlap=None,
-        window_size=8,
-        scale=4,
+    img,
+    model,
+    *,
+    tile: int,
+    tile_overlap: int,
+    window_size=8,
+    scale=4,
+    device,
 ):
-    tile = tile or opts.SWIN_tile
-    tile_overlap = tile_overlap or opts.SWIN_tile_overlap
-
 
     img = np.array(img)
     img = img[:, :, ::-1]
     img = np.moveaxis(img, 2, 0) / 255
     img = torch.from_numpy(img).float()
-    img = img.unsqueeze(0).to(device_swinir, dtype=devices.dtype)
+    img = img.unsqueeze(0).to(device, dtype=devices.dtype)
     with torch.no_grad(), devices.autocast():
         _, _, h_old, w_old = img.size()
         h_pad = (h_old // window_size + 1) * window_size - h_old
         w_pad = (w_old // window_size + 1) * window_size - w_old
         img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, : h_old + h_pad, :]
         img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, : w_old + w_pad]
-        output = inference(img, model, tile, tile_overlap, window_size, scale)
+        output = inference(
+            img,
+            model,
+            tile=tile,
+            tile_overlap=tile_overlap,
+            window_size=window_size,
+            scale=scale,
+            device=device,
+        )
         output = output[..., : h_old * scale, : w_old * scale]
         output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
         if output.ndim == 3:
@@ -142,7 +128,16 @@ def upscale(
         return Image.fromarray(output, "RGB")
 
 
-def inference(img, model, tile, tile_overlap, window_size, scale):
+def inference(
+    img,
+    model,
+    *,
+    tile: int,
+    tile_overlap: int,
+    window_size: int,
+    scale: int,
+    device,
+):
     # test the image tile by tile
     b, c, h, w = img.size()
     tile = min(tile, h, w)
@@ -152,8 +147,8 @@ def inference(img, model, tile, tile_overlap, window_size, scale):
     stride = tile - tile_overlap
     h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
     w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
-    E = torch.zeros(b, c, h * sf, w * sf, dtype=devices.dtype, device=device_swinir).type_as(img)
-    W = torch.zeros_like(E, dtype=devices.dtype, device=device_swinir)
+    E = torch.zeros(b, c, h * sf, w * sf, dtype=devices.dtype, device=device).type_as(img)
+    W = torch.zeros_like(E, dtype=devices.dtype, device=device)
 
     with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="SwinIR tiles") as pbar:
         for h_idx in h_idx_list:
@@ -185,8 +180,7 @@ def on_ui_settings():
 
     shared.opts.add_option("SWIN_tile", shared.OptionInfo(192, "Tile size for all SwinIR.", gr.Slider, {"minimum": 16, "maximum": 512, "step": 16}, section=('upscaling', "Upscaling")))
     shared.opts.add_option("SWIN_tile_overlap", shared.OptionInfo(8, "Tile overlap, in pixels for SwinIR. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}, section=('upscaling', "Upscaling")))
-    if int(torch.__version__.split('.')[0]) >= 2 and platform.system() != "Windows":    # torch.compile() require pytorch 2.0 or above, and not on Windows
-        shared.opts.add_option("SWIN_torch_compile", shared.OptionInfo(False, "Use torch.compile to accelerate SwinIR.", gr.Checkbox, {"interactive": True}, section=('upscaling', "Upscaling")).info("Takes longer on first run"))
+    shared.opts.add_option("SWIN_torch_compile", shared.OptionInfo(False, "Use torch.compile to accelerate SwinIR.", gr.Checkbox, {"interactive": True}, section=('upscaling', "Upscaling")).info("Takes longer on first run"))
 
 
 script_callbacks.on_ui_settings(on_ui_settings)
diff --git a/extensions-builtin/SwinIR/swinir_model_arch.py b/extensions-builtin/SwinIR/swinir_model_arch.py
deleted file mode 100644
index 93b93274..00000000
--- a/extensions-builtin/SwinIR/swinir_model_arch.py
+++ /dev/null
@@ -1,867 +0,0 @@
-# -----------------------------------------------------------------------------------
-# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257
-# Originally Written by Ze Liu, Modified by Jingyun Liang.
-# -----------------------------------------------------------------------------------
-
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint as checkpoint
-from timm.models.layers import DropPath, to_2tuple, trunc_normal_
-
-
-class Mlp(nn.Module):
-    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-def window_partition(x, window_size):
-    """
-    Args:
-        x: (B, H, W, C)
-        window_size (int): window size
-
-    Returns:
-        windows: (num_windows*B, window_size, window_size, C)
-    """
-    B, H, W, C = x.shape
-    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
-    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
-    return windows
-
-
-def window_reverse(windows, window_size, H, W):
-    """
-    Args:
-        windows: (num_windows*B, window_size, window_size, C)
-        window_size (int): Window size
-        H (int): Height of image
-        W (int): Width of image
-
-    Returns:
-        x: (B, H, W, C)
-    """
-    B = int(windows.shape[0] / (H * W / window_size / window_size))
-    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
-    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
-    return x
-
-
-class WindowAttention(nn.Module):
-    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
-    It supports both of shifted and non-shifted window.
-
-    Args:
-        dim (int): Number of input channels.
-        window_size (tuple[int]): The height and width of the window.
-        num_heads (int): Number of attention heads.
-        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
-        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
-        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
-        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
-    """
-
-    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
-
-        super().__init__()
-        self.dim = dim
-        self.window_size = window_size  # Wh, Ww
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = qk_scale or head_dim ** -0.5
-
-        # define a parameter table of relative position bias
-        self.relative_position_bias_table = nn.Parameter(
-            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
-
-        # get pair-wise relative position index for each token inside the window
-        coords_h = torch.arange(self.window_size[0])
-        coords_w = torch.arange(self.window_size[1])
-        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
-        relative_coords[:, :, 1] += self.window_size[1] - 1
-        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
-        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-        self.register_buffer("relative_position_index", relative_position_index)
-
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-
-        self.proj_drop = nn.Dropout(proj_drop)
-
-        trunc_normal_(self.relative_position_bias_table, std=.02)
-        self.softmax = nn.Softmax(dim=-1)
-
-    def forward(self, x, mask=None):
-        """
-        Args:
-            x: input features with shape of (num_windows*B, N, C)
-            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
-        """
-        B_, N, C = x.shape
-        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
-
-        q = q * self.scale
-        attn = (q @ k.transpose(-2, -1))
-
-        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
-            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
-        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-        attn = attn + relative_position_bias.unsqueeze(0)
-
-        if mask is not None:
-            nW = mask.shape[0]
-            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
-            attn = attn.view(-1, self.num_heads, N, N)
-            attn = self.softmax(attn)
-        else:
-            attn = self.softmax(attn)
-
-        attn = self.attn_drop(attn)
-
-        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-    def extra_repr(self) -> str:
-        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
-
-    def flops(self, N):
-        # calculate flops for 1 window with token length of N
-        flops = 0
-        # qkv = self.qkv(x)
-        flops += N * self.dim * 3 * self.dim
-        # attn = (q @ k.transpose(-2, -1))
-        flops += self.num_heads * N * (self.dim // self.num_heads) * N
-        #  x = (attn @ v)
-        flops += self.num_heads * N * N * (self.dim // self.num_heads)
-        # x = self.proj(x)
-        flops += N * self.dim * self.dim
-        return flops
-
-
-class SwinTransformerBlock(nn.Module):
-    r""" Swin Transformer Block.
-
-    Args:
-        dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resolution.
-        num_heads (int): Number of attention heads.
-        window_size (int): Window size.
-        shift_size (int): Shift size for SW-MSA.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
-        drop (float, optional): Dropout rate. Default: 0.0
-        attn_drop (float, optional): Attention dropout rate. Default: 0.0
-        drop_path (float, optional): Stochastic depth rate. Default: 0.0
-        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
-        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
-    """
-
-    def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
-                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
-                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
-        super().__init__()
-        self.dim = dim
-        self.input_resolution = input_resolution
-        self.num_heads = num_heads
-        self.window_size = window_size
-        self.shift_size = shift_size
-        self.mlp_ratio = mlp_ratio
-        if min(self.input_resolution) <= self.window_size:
-            # if window size is larger than input resolution, we don't partition windows
-            self.shift_size = 0
-            self.window_size = min(self.input_resolution)
-        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
-
-        self.norm1 = norm_layer(dim)
-        self.attn = WindowAttention(
-            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
-            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
-
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
-
-        if self.shift_size > 0:
-            attn_mask = self.calculate_mask(self.input_resolution)
-        else:
-            attn_mask = None
-
-        self.register_buffer("attn_mask", attn_mask)
-
-    def calculate_mask(self, x_size):
-        # calculate attention mask for SW-MSA
-        H, W = x_size
-        img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
-        h_slices = (slice(0, -self.window_size),
-                    slice(-self.window_size, -self.shift_size),
-                    slice(-self.shift_size, None))
-        w_slices = (slice(0, -self.window_size),
-                    slice(-self.window_size, -self.shift_size),
-                    slice(-self.shift_size, None))
-        cnt = 0
-        for h in h_slices:
-            for w in w_slices:
-                img_mask[:, h, w, :] = cnt
-                cnt += 1
-
-        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
-        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
-        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
-        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
-
-        return attn_mask
-
-    def forward(self, x, x_size):
-        H, W = x_size
-        B, L, C = x.shape
-        # assert L == H * W, "input feature has wrong size"
-
-        shortcut = x
-        x = self.norm1(x)
-        x = x.view(B, H, W, C)
-
-        # cyclic shift
-        if self.shift_size > 0:
-            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
-        else:
-            shifted_x = x
-
-        # partition windows
-        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
-        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
-
-        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
-        if self.input_resolution == x_size:
-            attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
-        else:
-            attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
-
-        # merge windows
-        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
-        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
-
-        # reverse cyclic shift
-        if self.shift_size > 0:
-            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
-        else:
-            x = shifted_x
-        x = x.view(B, H * W, C)
-
-        # FFN
-        x = shortcut + self.drop_path(x)
-        x = x + self.drop_path(self.mlp(self.norm2(x)))
-
-        return x
-
-    def extra_repr(self) -> str:
-        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
-               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
-
-    def flops(self):
-        flops = 0
-        H, W = self.input_resolution
-        # norm1
-        flops += self.dim * H * W
-        # W-MSA/SW-MSA
-        nW = H * W / self.window_size / self.window_size
-        flops += nW * self.attn.flops(self.window_size * self.window_size)
-        # mlp
-        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
-        # norm2
-        flops += self.dim * H * W
-        return flops
-
-
-class PatchMerging(nn.Module):
-    r""" Patch Merging Layer.
-
-    Args:
-        input_resolution (tuple[int]): Resolution of input feature.
-        dim (int): Number of input channels.
-        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
-    """
-
-    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
-        super().__init__()
-        self.input_resolution = input_resolution
-        self.dim = dim
-        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
-        self.norm = norm_layer(4 * dim)
-
-    def forward(self, x):
-        """
-        x: B, H*W, C
-        """
-        H, W = self.input_resolution
-        B, L, C = x.shape
-        assert L == H * W, "input feature has wrong size"
-        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
-
-        x = x.view(B, H, W, C)
-
-        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
-        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
-        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
-        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
-        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
-        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
-
-        x = self.norm(x)
-        x = self.reduction(x)
-
-        return x
-
-    def extra_repr(self) -> str:
-        return f"input_resolution={self.input_resolution}, dim={self.dim}"
-
-    def flops(self):
-        H, W = self.input_resolution
-        flops = H * W * self.dim
-        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
-        return flops
-
-
-class BasicLayer(nn.Module):
-    """ A basic Swin Transformer layer for one stage.
-
-    Args:
-        dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resolution.
-        depth (int): Number of blocks.
-        num_heads (int): Number of attention heads.
-        window_size (int): Local window size.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
-        drop (float, optional): Dropout rate. Default: 0.0
-        attn_drop (float, optional): Attention dropout rate. Default: 0.0
-        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
-        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
-        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
-        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
-    """
-
-    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
-                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
-                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):
-
-        super().__init__()
-        self.dim = dim
-        self.input_resolution = input_resolution
-        self.depth = depth
-        self.use_checkpoint = use_checkpoint
-
-        # build blocks
-        self.blocks = nn.ModuleList([
-            SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
-                                 num_heads=num_heads, window_size=window_size,
-                                 shift_size=0 if (i % 2 == 0) else window_size // 2,
-                                 mlp_ratio=mlp_ratio,
-                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
-                                 drop=drop, attn_drop=attn_drop,
-                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
-                                 norm_layer=norm_layer)
-            for i in range(depth)])
-
-        # patch merging layer
-        if downsample is not None:
-            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
-        else:
-            self.downsample = None
-
-    def forward(self, x, x_size):
-        for blk in self.blocks:
-            if self.use_checkpoint:
-                x = checkpoint.checkpoint(blk, x, x_size)
-            else:
-                x = blk(x, x_size)
-        if self.downsample is not None:
-            x = self.downsample(x)
-        return x
-
-    def extra_repr(self) -> str:
-        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
-
-    def flops(self):
-        flops = 0
-        for blk in self.blocks:
-            flops += blk.flops()
-        if self.downsample is not None:
-            flops += self.downsample.flops()
-        return flops
-
-
-class RSTB(nn.Module):
-    """Residual Swin Transformer Block (RSTB).
-
-    Args:
-        dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resolution.
-        depth (int): Number of blocks.
-        num_heads (int): Number of attention heads.
-        window_size (int): Local window size.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
-        drop (float, optional): Dropout rate. Default: 0.0
-        attn_drop (float, optional): Attention dropout rate. Default: 0.0
-        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
-        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
-        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
-        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
-        img_size: Input image size.
-        patch_size: Patch size.
-        resi_connection: The convolutional block before residual connection.
-    """
-
-    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
-                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
-                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
-                 img_size=224, patch_size=4, resi_connection='1conv'):
-        super(RSTB, self).__init__()
-
-        self.dim = dim
-        self.input_resolution = input_resolution
-
-        self.residual_group = BasicLayer(dim=dim,
-                                         input_resolution=input_resolution,
-                                         depth=depth,
-                                         num_heads=num_heads,
-                                         window_size=window_size,
-                                         mlp_ratio=mlp_ratio,
-                                         qkv_bias=qkv_bias, qk_scale=qk_scale,
-                                         drop=drop, attn_drop=attn_drop,
-                                         drop_path=drop_path,
-                                         norm_layer=norm_layer,
-                                         downsample=downsample,
-                                         use_checkpoint=use_checkpoint)
-
-        if resi_connection == '1conv':
-            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
-        elif resi_connection == '3conv':
-            # to save parameters and memory
-            self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                      nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
-                                      nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                      nn.Conv2d(dim // 4, dim, 3, 1, 1))
-
-        self.patch_embed = PatchEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
-            norm_layer=None)
-
-        self.patch_unembed = PatchUnEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
-            norm_layer=None)
-
-    def forward(self, x, x_size):
-        return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x
-
-    def flops(self):
-        flops = 0
-        flops += self.residual_group.flops()
-        H, W = self.input_resolution
-        flops += H * W * self.dim * self.dim * 9
-        flops += self.patch_embed.flops()
-        flops += self.patch_unembed.flops()
-
-        return flops
-
-
-class PatchEmbed(nn.Module):
-    r""" Image to Patch Embedding
-
-    Args:
-        img_size (int): Image size.  Default: 224.
-        patch_size (int): Patch token size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
-        embed_dim (int): Number of linear projection output channels. Default: 96.
-        norm_layer (nn.Module, optional): Normalization layer. Default: None
-    """
-
-    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.patches_resolution = patches_resolution
-        self.num_patches = patches_resolution[0] * patches_resolution[1]
-
-        self.in_chans = in_chans
-        self.embed_dim = embed_dim
-
-        if norm_layer is not None:
-            self.norm = norm_layer(embed_dim)
-        else:
-            self.norm = None
-
-    def forward(self, x):
-        x = x.flatten(2).transpose(1, 2)  # B Ph*Pw C
-        if self.norm is not None:
-            x = self.norm(x)
-        return x
-
-    def flops(self):
-        flops = 0
-        H, W = self.img_size
-        if self.norm is not None:
-            flops += H * W * self.embed_dim
-        return flops
-
-
-class PatchUnEmbed(nn.Module):
-    r""" Image to Patch Unembedding
-
-    Args:
-        img_size (int): Image size.  Default: 224.
-        patch_size (int): Patch token size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
-        embed_dim (int): Number of linear projection output channels. Default: 96.
-        norm_layer (nn.Module, optional): Normalization layer. Default: None
-    """
-
-    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.patches_resolution = patches_resolution
-        self.num_patches = patches_resolution[0] * patches_resolution[1]
-
-        self.in_chans = in_chans
-        self.embed_dim = embed_dim
-
-    def forward(self, x, x_size):
-        B, HW, C = x.shape
-        x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1])  # B Ph*Pw C
-        return x
-
-    def flops(self):
-        flops = 0
-        return flops
-
-
-class Upsample(nn.Sequential):
-    """Upsample module.
-
-    Args:
-        scale (int): Scale factor. Supported scales: 2^n and 3.
-        num_feat (int): Channel number of intermediate features.
-    """
-
-    def __init__(self, scale, num_feat):
-        m = []
-        if (scale & (scale - 1)) == 0:  # scale = 2^n
-            for _ in range(int(math.log(scale, 2))):
-                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
-                m.append(nn.PixelShuffle(2))
-        elif scale == 3:
-            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
-            m.append(nn.PixelShuffle(3))
-        else:
-            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
-        super(Upsample, self).__init__(*m)
-
-
-class UpsampleOneStep(nn.Sequential):
-    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
-       Used in lightweight SR to save parameters.
-
-    Args:
-        scale (int): Scale factor. Supported scales: 2^n and 3.
-        num_feat (int): Channel number of intermediate features.
-
-    """
-
-    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
-        self.num_feat = num_feat
-        self.input_resolution = input_resolution
-        m = []
-        m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1))
-        m.append(nn.PixelShuffle(scale))
-        super(UpsampleOneStep, self).__init__(*m)
-
-    def flops(self):
-        H, W = self.input_resolution
-        flops = H * W * self.num_feat * 3 * 9
-        return flops
-
-
-class SwinIR(nn.Module):
-    r""" SwinIR
-        A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer.
-
-    Args:
-        img_size (int | tuple(int)): Input image size. Default 64
-        patch_size (int | tuple(int)): Patch size. Default: 1
-        in_chans (int): Number of input image channels. Default: 3
-        embed_dim (int): Patch embedding dimension. Default: 96
-        depths (tuple(int)): Depth of each Swin Transformer layer.
-        num_heads (tuple(int)): Number of attention heads in different layers.
-        window_size (int): Window size. Default: 7
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
-        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
-        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
-        drop_rate (float): Dropout rate. Default: 0
-        attn_drop_rate (float): Attention dropout rate. Default: 0
-        drop_path_rate (float): Stochastic depth rate. Default: 0.1
-        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
-        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
-        patch_norm (bool): If True, add normalization after patch embedding. Default: True
-        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
-        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
-        img_range: Image range. 1. or 255.
-        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
-        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
-    """
-
-    def __init__(self, img_size=64, patch_size=1, in_chans=3,
-                 embed_dim=96, depths=(6, 6, 6, 6), num_heads=(6, 6, 6, 6),
-                 window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
-                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
-                 norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
-                 use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
-                 **kwargs):
-        super(SwinIR, self).__init__()
-        num_in_ch = in_chans
-        num_out_ch = in_chans
-        num_feat = 64
-        self.img_range = img_range
-        if in_chans == 3:
-            rgb_mean = (0.4488, 0.4371, 0.4040)
-            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
-        else:
-            self.mean = torch.zeros(1, 1, 1, 1)
-        self.upscale = upscale
-        self.upsampler = upsampler
-        self.window_size = window_size
-
-        #####################################################################################################
-        ################################### 1, shallow feature extraction ###################################
-        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
-
-        #####################################################################################################
-        ################################### 2, deep feature extraction ######################################
-        self.num_layers = len(depths)
-        self.embed_dim = embed_dim
-        self.ape = ape
-        self.patch_norm = patch_norm
-        self.num_features = embed_dim
-        self.mlp_ratio = mlp_ratio
-
-        # split image into non-overlapping patches
-        self.patch_embed = PatchEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
-            norm_layer=norm_layer if self.patch_norm else None)
-        num_patches = self.patch_embed.num_patches
-        patches_resolution = self.patch_embed.patches_resolution
-        self.patches_resolution = patches_resolution
-
-        # merge non-overlapping patches into image
-        self.patch_unembed = PatchUnEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
-            norm_layer=norm_layer if self.patch_norm else None)
-
-        # absolute position embedding
-        if self.ape:
-            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
-            trunc_normal_(self.absolute_pos_embed, std=.02)
-
-        self.pos_drop = nn.Dropout(p=drop_rate)
-
-        # stochastic depth
-        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
-
-        # build Residual Swin Transformer blocks (RSTB)
-        self.layers = nn.ModuleList()
-        for i_layer in range(self.num_layers):
-            layer = RSTB(dim=embed_dim,
-                         input_resolution=(patches_resolution[0],
-                                           patches_resolution[1]),
-                         depth=depths[i_layer],
-                         num_heads=num_heads[i_layer],
-                         window_size=window_size,
-                         mlp_ratio=self.mlp_ratio,
-                         qkv_bias=qkv_bias, qk_scale=qk_scale,
-                         drop=drop_rate, attn_drop=attn_drop_rate,
-                         drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],  # no impact on SR results
-                         norm_layer=norm_layer,
-                         downsample=None,
-                         use_checkpoint=use_checkpoint,
-                         img_size=img_size,
-                         patch_size=patch_size,
-                         resi_connection=resi_connection
-
-                         )
-            self.layers.append(layer)
-        self.norm = norm_layer(self.num_features)
-
-        # build the last conv layer in deep feature extraction
-        if resi_connection == '1conv':
-            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
-        elif resi_connection == '3conv':
-            # to save parameters and memory
-            self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
-                                                 nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                                 nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
-                                                 nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                                 nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))
-
-        #####################################################################################################
-        ################################ 3, high quality image reconstruction ################################
-        if self.upsampler == 'pixelshuffle':
-            # for classical SR
-            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                                                      nn.LeakyReLU(inplace=True))
-            self.upsample = Upsample(upscale, num_feat)
-            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-        elif self.upsampler == 'pixelshuffledirect':
-            # for lightweight SR (to save parameters)
-            self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
-                                            (patches_resolution[0], patches_resolution[1]))
-        elif self.upsampler == 'nearest+conv':
-            # for real-world SR (less artifacts)
-            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                                                      nn.LeakyReLU(inplace=True))
-            self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-            if self.upscale == 4:
-                self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-            self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-            self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-        else:
-            # for image denoising and JPEG compression artifact reduction
-            self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
-
-        self.apply(self._init_weights)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight, std=.02)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-
-    @torch.jit.ignore
-    def no_weight_decay(self):
-        return {'absolute_pos_embed'}
-
-    @torch.jit.ignore
-    def no_weight_decay_keywords(self):
-        return {'relative_position_bias_table'}
-
-    def check_image_size(self, x):
-        _, _, h, w = x.size()
-        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
-        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
-        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
-        return x
-
-    def forward_features(self, x):
-        x_size = (x.shape[2], x.shape[3])
-        x = self.patch_embed(x)
-        if self.ape:
-            x = x + self.absolute_pos_embed
-        x = self.pos_drop(x)
-
-        for layer in self.layers:
-            x = layer(x, x_size)
-
-        x = self.norm(x)  # B L C
-        x = self.patch_unembed(x, x_size)
-
-        return x
-
-    def forward(self, x):
-        H, W = x.shape[2:]
-        x = self.check_image_size(x)
-
-        self.mean = self.mean.type_as(x)
-        x = (x - self.mean) * self.img_range
-
-        if self.upsampler == 'pixelshuffle':
-            # for classical SR
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.conv_before_upsample(x)
-            x = self.conv_last(self.upsample(x))
-        elif self.upsampler == 'pixelshuffledirect':
-            # for lightweight SR
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.upsample(x)
-        elif self.upsampler == 'nearest+conv':
-            # for real-world SR
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.conv_before_upsample(x)
-            x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
-            if self.upscale == 4:
-                x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
-            x = self.conv_last(self.lrelu(self.conv_hr(x)))
-        else:
-            # for image denoising and JPEG compression artifact reduction
-            x_first = self.conv_first(x)
-            res = self.conv_after_body(self.forward_features(x_first)) + x_first
-            x = x + self.conv_last(res)
-
-        x = x / self.img_range + self.mean
-
-        return x[:, :, :H*self.upscale, :W*self.upscale]
-
-    def flops(self):
-        flops = 0
-        H, W = self.patches_resolution
-        flops += H * W * 3 * self.embed_dim * 9
-        flops += self.patch_embed.flops()
-        for layer in self.layers:
-            flops += layer.flops()
-        flops += H * W * 3 * self.embed_dim * self.embed_dim
-        flops += self.upsample.flops()
-        return flops
-
-
-if __name__ == '__main__':
-    upscale = 4
-    window_size = 8
-    height = (1024 // upscale // window_size + 1) * window_size
-    width = (720 // upscale // window_size + 1) * window_size
-    model = SwinIR(upscale=2, img_size=(height, width),
-                   window_size=window_size, img_range=1., depths=[6, 6, 6, 6],
-                   embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect')
-    print(model)
-    print(height, width, model.flops() / 1e9)
-
-    x = torch.randn((1, 3, height, width))
-    x = model(x)
-    print(x.shape)
diff --git a/extensions-builtin/SwinIR/swinir_model_arch_v2.py b/extensions-builtin/SwinIR/swinir_model_arch_v2.py
deleted file mode 100644
index dad22cca..00000000
--- a/extensions-builtin/SwinIR/swinir_model_arch_v2.py
+++ /dev/null
@@ -1,1017 +0,0 @@
-# -----------------------------------------------------------------------------------
-# Swin2SR: Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, https://arxiv.org/abs/
-# Written by Conde and Choi et al.
-# -----------------------------------------------------------------------------------
-
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint as checkpoint
-from timm.models.layers import DropPath, to_2tuple, trunc_normal_
-
-
-class Mlp(nn.Module):
-    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-def window_partition(x, window_size):
-    """
-    Args:
-        x: (B, H, W, C)
-        window_size (int): window size
-    Returns:
-        windows: (num_windows*B, window_size, window_size, C)
-    """
-    B, H, W, C = x.shape
-    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
-    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
-    return windows
-
-
-def window_reverse(windows, window_size, H, W):
-    """
-    Args:
-        windows: (num_windows*B, window_size, window_size, C)
-        window_size (int): Window size
-        H (int): Height of image
-        W (int): Width of image
-    Returns:
-        x: (B, H, W, C)
-    """
-    B = int(windows.shape[0] / (H * W / window_size / window_size))
-    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
-    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
-    return x
-
-class WindowAttention(nn.Module):
-    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
-    It supports both of shifted and non-shifted window.
-    Args:
-        dim (int): Number of input channels.
-        window_size (tuple[int]): The height and width of the window.
-        num_heads (int): Number of attention heads.
-        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
-        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
-        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
-        pretrained_window_size (tuple[int]): The height and width of the window in pre-training.
-    """
-
-    def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.,
-                 pretrained_window_size=(0, 0)):
-
-        super().__init__()
-        self.dim = dim
-        self.window_size = window_size  # Wh, Ww
-        self.pretrained_window_size = pretrained_window_size
-        self.num_heads = num_heads
-
-        self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True)
-
-        # mlp to generate continuous relative position bias
-        self.cpb_mlp = nn.Sequential(nn.Linear(2, 512, bias=True),
-                                     nn.ReLU(inplace=True),
-                                     nn.Linear(512, num_heads, bias=False))
-
-        # get relative_coords_table
-        relative_coords_h = torch.arange(-(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32)
-        relative_coords_w = torch.arange(-(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32)
-        relative_coords_table = torch.stack(
-            torch.meshgrid([relative_coords_h,
-                            relative_coords_w])).permute(1, 2, 0).contiguous().unsqueeze(0)  # 1, 2*Wh-1, 2*Ww-1, 2
-        if pretrained_window_size[0] > 0:
-            relative_coords_table[:, :, :, 0] /= (pretrained_window_size[0] - 1)
-            relative_coords_table[:, :, :, 1] /= (pretrained_window_size[1] - 1)
-        else:
-            relative_coords_table[:, :, :, 0] /= (self.window_size[0] - 1)
-            relative_coords_table[:, :, :, 1] /= (self.window_size[1] - 1)
-        relative_coords_table *= 8  # normalize to -8, 8
-        relative_coords_table = torch.sign(relative_coords_table) * torch.log2(
-            torch.abs(relative_coords_table) + 1.0) / np.log2(8)
-
-        self.register_buffer("relative_coords_table", relative_coords_table)
-
-        # get pair-wise relative position index for each token inside the window
-        coords_h = torch.arange(self.window_size[0])
-        coords_w = torch.arange(self.window_size[1])
-        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
-        relative_coords[:, :, 1] += self.window_size[1] - 1
-        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
-        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-        self.register_buffer("relative_position_index", relative_position_index)
-
-        self.qkv = nn.Linear(dim, dim * 3, bias=False)
-        if qkv_bias:
-            self.q_bias = nn.Parameter(torch.zeros(dim))
-            self.v_bias = nn.Parameter(torch.zeros(dim))
-        else:
-            self.q_bias = None
-            self.v_bias = None
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-        self.softmax = nn.Softmax(dim=-1)
-
-    def forward(self, x, mask=None):
-        """
-        Args:
-            x: input features with shape of (num_windows*B, N, C)
-            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
-        """
-        B_, N, C = x.shape
-        qkv_bias = None
-        if self.q_bias is not None:
-            qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
-        qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
-        qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
-
-        # cosine attention
-        attn = (F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1))
-        logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01)).to(self.logit_scale.device)).exp()
-        attn = attn * logit_scale
-
-        relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(-1, self.num_heads)
-        relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view(
-            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
-        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-        relative_position_bias = 16 * torch.sigmoid(relative_position_bias)
-        attn = attn + relative_position_bias.unsqueeze(0)
-
-        if mask is not None:
-            nW = mask.shape[0]
-            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
-            attn = attn.view(-1, self.num_heads, N, N)
-            attn = self.softmax(attn)
-        else:
-            attn = self.softmax(attn)
-
-        attn = self.attn_drop(attn)
-
-        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-    def extra_repr(self) -> str:
-        return f'dim={self.dim}, window_size={self.window_size}, ' \
-               f'pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}'
-
-    def flops(self, N):
-        # calculate flops for 1 window with token length of N
-        flops = 0
-        # qkv = self.qkv(x)
-        flops += N * self.dim * 3 * self.dim
-        # attn = (q @ k.transpose(-2, -1))
-        flops += self.num_heads * N * (self.dim // self.num_heads) * N
-        #  x = (attn @ v)
-        flops += self.num_heads * N * N * (self.dim // self.num_heads)
-        # x = self.proj(x)
-        flops += N * self.dim * self.dim
-        return flops
-
-class SwinTransformerBlock(nn.Module):
-    r""" Swin Transformer Block.
-    Args:
-        dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resulotion.
-        num_heads (int): Number of attention heads.
-        window_size (int): Window size.
-        shift_size (int): Shift size for SW-MSA.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        drop (float, optional): Dropout rate. Default: 0.0
-        attn_drop (float, optional): Attention dropout rate. Default: 0.0
-        drop_path (float, optional): Stochastic depth rate. Default: 0.0
-        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
-        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
-        pretrained_window_size (int): Window size in pre-training.
-    """
-
-    def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
-                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,
-                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pretrained_window_size=0):
-        super().__init__()
-        self.dim = dim
-        self.input_resolution = input_resolution
-        self.num_heads = num_heads
-        self.window_size = window_size
-        self.shift_size = shift_size
-        self.mlp_ratio = mlp_ratio
-        if min(self.input_resolution) <= self.window_size:
-            # if window size is larger than input resolution, we don't partition windows
-            self.shift_size = 0
-            self.window_size = min(self.input_resolution)
-        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
-
-        self.norm1 = norm_layer(dim)
-        self.attn = WindowAttention(
-            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
-            qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop,
-            pretrained_window_size=to_2tuple(pretrained_window_size))
-
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
-
-        if self.shift_size > 0:
-            attn_mask = self.calculate_mask(self.input_resolution)
-        else:
-            attn_mask = None
-
-        self.register_buffer("attn_mask", attn_mask)
-
-    def calculate_mask(self, x_size):
-        # calculate attention mask for SW-MSA
-        H, W = x_size
-        img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
-        h_slices = (slice(0, -self.window_size),
-                    slice(-self.window_size, -self.shift_size),
-                    slice(-self.shift_size, None))
-        w_slices = (slice(0, -self.window_size),
-                    slice(-self.window_size, -self.shift_size),
-                    slice(-self.shift_size, None))
-        cnt = 0
-        for h in h_slices:
-            for w in w_slices:
-                img_mask[:, h, w, :] = cnt
-                cnt += 1
-
-        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
-        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
-        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
-        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
-
-        return attn_mask
-
-    def forward(self, x, x_size):
-        H, W = x_size
-        B, L, C = x.shape
-        #assert L == H * W, "input feature has wrong size"
-
-        shortcut = x
-        x = x.view(B, H, W, C)
-
-        # cyclic shift
-        if self.shift_size > 0:
-            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
-        else:
-            shifted_x = x
-
-        # partition windows
-        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
-        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
-
-        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
-        if self.input_resolution == x_size:
-            attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
-        else:
-            attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
-
-        # merge windows
-        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
-        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
-
-        # reverse cyclic shift
-        if self.shift_size > 0:
-            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
-        else:
-            x = shifted_x
-        x = x.view(B, H * W, C)
-        x = shortcut + self.drop_path(self.norm1(x))
-
-        # FFN
-        x = x + self.drop_path(self.norm2(self.mlp(x)))
-
-        return x
-
-    def extra_repr(self) -> str:
-        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
-               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
-
-    def flops(self):
-        flops = 0
-        H, W = self.input_resolution
-        # norm1
-        flops += self.dim * H * W
-        # W-MSA/SW-MSA
-        nW = H * W / self.window_size / self.window_size
-        flops += nW * self.attn.flops(self.window_size * self.window_size)
-        # mlp
-        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
-        # norm2
-        flops += self.dim * H * W
-        return flops
-
-class PatchMerging(nn.Module):
-    r""" Patch Merging Layer.
-    Args:
-        input_resolution (tuple[int]): Resolution of input feature.
-        dim (int): Number of input channels.
-        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
-    """
-
-    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
-        super().__init__()
-        self.input_resolution = input_resolution
-        self.dim = dim
-        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
-        self.norm = norm_layer(2 * dim)
-
-    def forward(self, x):
-        """
-        x: B, H*W, C
-        """
-        H, W = self.input_resolution
-        B, L, C = x.shape
-        assert L == H * W, "input feature has wrong size"
-        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
-
-        x = x.view(B, H, W, C)
-
-        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
-        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
-        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
-        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
-        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
-        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
-
-        x = self.reduction(x)
-        x = self.norm(x)
-
-        return x
-
-    def extra_repr(self) -> str:
-        return f"input_resolution={self.input_resolution}, dim={self.dim}"
-
-    def flops(self):
-        H, W = self.input_resolution
-        flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
-        flops += H * W * self.dim // 2
-        return flops
-
-class BasicLayer(nn.Module):
-    """ A basic Swin Transformer layer for one stage.
-    Args:
-        dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resolution.
-        depth (int): Number of blocks.
-        num_heads (int): Number of attention heads.
-        window_size (int): Local window size.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        drop (float, optional): Dropout rate. Default: 0.0
-        attn_drop (float, optional): Attention dropout rate. Default: 0.0
-        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
-        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
-        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
-        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
-        pretrained_window_size (int): Local window size in pre-training.
-    """
-
-    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
-                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
-                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
-                 pretrained_window_size=0):
-
-        super().__init__()
-        self.dim = dim
-        self.input_resolution = input_resolution
-        self.depth = depth
-        self.use_checkpoint = use_checkpoint
-
-        # build blocks
-        self.blocks = nn.ModuleList([
-            SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
-                                 num_heads=num_heads, window_size=window_size,
-                                 shift_size=0 if (i % 2 == 0) else window_size // 2,
-                                 mlp_ratio=mlp_ratio,
-                                 qkv_bias=qkv_bias,
-                                 drop=drop, attn_drop=attn_drop,
-                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
-                                 norm_layer=norm_layer,
-                                 pretrained_window_size=pretrained_window_size)
-            for i in range(depth)])
-
-        # patch merging layer
-        if downsample is not None:
-            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
-        else:
-            self.downsample = None
-
-    def forward(self, x, x_size):
-        for blk in self.blocks:
-            if self.use_checkpoint:
-                x = checkpoint.checkpoint(blk, x, x_size)
-            else:
-                x = blk(x, x_size)
-        if self.downsample is not None:
-            x = self.downsample(x)
-        return x
-
-    def extra_repr(self) -> str:
-        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
-
-    def flops(self):
-        flops = 0
-        for blk in self.blocks:
-            flops += blk.flops()
-        if self.downsample is not None:
-            flops += self.downsample.flops()
-        return flops
-
-    def _init_respostnorm(self):
-        for blk in self.blocks:
-            nn.init.constant_(blk.norm1.bias, 0)
-            nn.init.constant_(blk.norm1.weight, 0)
-            nn.init.constant_(blk.norm2.bias, 0)
-            nn.init.constant_(blk.norm2.weight, 0)
-
-class PatchEmbed(nn.Module):
-    r""" Image to Patch Embedding
-    Args:
-        img_size (int): Image size.  Default: 224.
-        patch_size (int): Patch token size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
-        embed_dim (int): Number of linear projection output channels. Default: 96.
-        norm_layer (nn.Module, optional): Normalization layer. Default: None
-    """
-
-    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.patches_resolution = patches_resolution
-        self.num_patches = patches_resolution[0] * patches_resolution[1]
-
-        self.in_chans = in_chans
-        self.embed_dim = embed_dim
-
-        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-        if norm_layer is not None:
-            self.norm = norm_layer(embed_dim)
-        else:
-            self.norm = None
-
-    def forward(self, x):
-        B, C, H, W = x.shape
-        # FIXME look at relaxing size constraints
-        # assert H == self.img_size[0] and W == self.img_size[1],
-        #     f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
-        x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
-        if self.norm is not None:
-            x = self.norm(x)
-        return x
-
-    def flops(self):
-        Ho, Wo = self.patches_resolution
-        flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
-        if self.norm is not None:
-            flops += Ho * Wo * self.embed_dim
-        return flops
-
-class RSTB(nn.Module):
-    """Residual Swin Transformer Block (RSTB).
-
-    Args:
-        dim (int): Number of input channels.
-        input_resolution (tuple[int]): Input resolution.
-        depth (int): Number of blocks.
-        num_heads (int): Number of attention heads.
-        window_size (int): Local window size.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        drop (float, optional): Dropout rate. Default: 0.0
-        attn_drop (float, optional): Attention dropout rate. Default: 0.0
-        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
-        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
-        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
-        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
-        img_size: Input image size.
-        patch_size: Patch size.
-        resi_connection: The convolutional block before residual connection.
-    """
-
-    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
-                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
-                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
-                 img_size=224, patch_size=4, resi_connection='1conv'):
-        super(RSTB, self).__init__()
-
-        self.dim = dim
-        self.input_resolution = input_resolution
-
-        self.residual_group = BasicLayer(dim=dim,
-                                         input_resolution=input_resolution,
-                                         depth=depth,
-                                         num_heads=num_heads,
-                                         window_size=window_size,
-                                         mlp_ratio=mlp_ratio,
-                                         qkv_bias=qkv_bias,
-                                         drop=drop, attn_drop=attn_drop,
-                                         drop_path=drop_path,
-                                         norm_layer=norm_layer,
-                                         downsample=downsample,
-                                         use_checkpoint=use_checkpoint)
-
-        if resi_connection == '1conv':
-            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
-        elif resi_connection == '3conv':
-            # to save parameters and memory
-            self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                      nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
-                                      nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                      nn.Conv2d(dim // 4, dim, 3, 1, 1))
-
-        self.patch_embed = PatchEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=dim, embed_dim=dim,
-            norm_layer=None)
-
-        self.patch_unembed = PatchUnEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=dim, embed_dim=dim,
-            norm_layer=None)
-
-    def forward(self, x, x_size):
-        return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x
-
-    def flops(self):
-        flops = 0
-        flops += self.residual_group.flops()
-        H, W = self.input_resolution
-        flops += H * W * self.dim * self.dim * 9
-        flops += self.patch_embed.flops()
-        flops += self.patch_unembed.flops()
-
-        return flops
-
-class PatchUnEmbed(nn.Module):
-    r""" Image to Patch Unembedding
-
-    Args:
-        img_size (int): Image size.  Default: 224.
-        patch_size (int): Patch token size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
-        embed_dim (int): Number of linear projection output channels. Default: 96.
-        norm_layer (nn.Module, optional): Normalization layer. Default: None
-    """
-
-    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.patches_resolution = patches_resolution
-        self.num_patches = patches_resolution[0] * patches_resolution[1]
-
-        self.in_chans = in_chans
-        self.embed_dim = embed_dim
-
-    def forward(self, x, x_size):
-        B, HW, C = x.shape
-        x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1])  # B Ph*Pw C
-        return x
-
-    def flops(self):
-        flops = 0
-        return flops
-
-
-class Upsample(nn.Sequential):
-    """Upsample module.
-
-    Args:
-        scale (int): Scale factor. Supported scales: 2^n and 3.
-        num_feat (int): Channel number of intermediate features.
-    """
-
-    def __init__(self, scale, num_feat):
-        m = []
-        if (scale & (scale - 1)) == 0:  # scale = 2^n
-            for _ in range(int(math.log(scale, 2))):
-                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
-                m.append(nn.PixelShuffle(2))
-        elif scale == 3:
-            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
-            m.append(nn.PixelShuffle(3))
-        else:
-            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
-        super(Upsample, self).__init__(*m)
-
-class Upsample_hf(nn.Sequential):
-    """Upsample module.
-
-    Args:
-        scale (int): Scale factor. Supported scales: 2^n and 3.
-        num_feat (int): Channel number of intermediate features.
-    """
-
-    def __init__(self, scale, num_feat):
-        m = []
-        if (scale & (scale - 1)) == 0:  # scale = 2^n
-            for _ in range(int(math.log(scale, 2))):
-                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
-                m.append(nn.PixelShuffle(2))
-        elif scale == 3:
-            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
-            m.append(nn.PixelShuffle(3))
-        else:
-            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
-        super(Upsample_hf, self).__init__(*m)
-
-
-class UpsampleOneStep(nn.Sequential):
-    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
-       Used in lightweight SR to save parameters.
-
-    Args:
-        scale (int): Scale factor. Supported scales: 2^n and 3.
-        num_feat (int): Channel number of intermediate features.
-
-    """
-
-    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
-        self.num_feat = num_feat
-        self.input_resolution = input_resolution
-        m = []
-        m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1))
-        m.append(nn.PixelShuffle(scale))
-        super(UpsampleOneStep, self).__init__(*m)
-
-    def flops(self):
-        H, W = self.input_resolution
-        flops = H * W * self.num_feat * 3 * 9
-        return flops
-
-
-
-class Swin2SR(nn.Module):
-    r""" Swin2SR
-        A PyTorch impl of : `Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration`.
-
-    Args:
-        img_size (int | tuple(int)): Input image size. Default 64
-        patch_size (int | tuple(int)): Patch size. Default: 1
-        in_chans (int): Number of input image channels. Default: 3
-        embed_dim (int): Patch embedding dimension. Default: 96
-        depths (tuple(int)): Depth of each Swin Transformer layer.
-        num_heads (tuple(int)): Number of attention heads in different layers.
-        window_size (int): Window size. Default: 7
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
-        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
-        drop_rate (float): Dropout rate. Default: 0
-        attn_drop_rate (float): Attention dropout rate. Default: 0
-        drop_path_rate (float): Stochastic depth rate. Default: 0.1
-        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
-        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
-        patch_norm (bool): If True, add normalization after patch embedding. Default: True
-        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
-        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
-        img_range: Image range. 1. or 255.
-        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
-        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
-    """
-
-    def __init__(self, img_size=64, patch_size=1, in_chans=3,
-                 embed_dim=96, depths=(6, 6, 6, 6), num_heads=(6, 6, 6, 6),
-                 window_size=7, mlp_ratio=4., qkv_bias=True,
-                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
-                 norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
-                 use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
-                 **kwargs):
-        super(Swin2SR, self).__init__()
-        num_in_ch = in_chans
-        num_out_ch = in_chans
-        num_feat = 64
-        self.img_range = img_range
-        if in_chans == 3:
-            rgb_mean = (0.4488, 0.4371, 0.4040)
-            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
-        else:
-            self.mean = torch.zeros(1, 1, 1, 1)
-        self.upscale = upscale
-        self.upsampler = upsampler
-        self.window_size = window_size
-
-        #####################################################################################################
-        ################################### 1, shallow feature extraction ###################################
-        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
-
-        #####################################################################################################
-        ################################### 2, deep feature extraction ######################################
-        self.num_layers = len(depths)
-        self.embed_dim = embed_dim
-        self.ape = ape
-        self.patch_norm = patch_norm
-        self.num_features = embed_dim
-        self.mlp_ratio = mlp_ratio
-
-        # split image into non-overlapping patches
-        self.patch_embed = PatchEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
-            norm_layer=norm_layer if self.patch_norm else None)
-        num_patches = self.patch_embed.num_patches
-        patches_resolution = self.patch_embed.patches_resolution
-        self.patches_resolution = patches_resolution
-
-        # merge non-overlapping patches into image
-        self.patch_unembed = PatchUnEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
-            norm_layer=norm_layer if self.patch_norm else None)
-
-        # absolute position embedding
-        if self.ape:
-            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
-            trunc_normal_(self.absolute_pos_embed, std=.02)
-
-        self.pos_drop = nn.Dropout(p=drop_rate)
-
-        # stochastic depth
-        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
-
-        # build Residual Swin Transformer blocks (RSTB)
-        self.layers = nn.ModuleList()
-        for i_layer in range(self.num_layers):
-            layer = RSTB(dim=embed_dim,
-                         input_resolution=(patches_resolution[0],
-                                           patches_resolution[1]),
-                         depth=depths[i_layer],
-                         num_heads=num_heads[i_layer],
-                         window_size=window_size,
-                         mlp_ratio=self.mlp_ratio,
-                         qkv_bias=qkv_bias,
-                         drop=drop_rate, attn_drop=attn_drop_rate,
-                         drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],  # no impact on SR results
-                         norm_layer=norm_layer,
-                         downsample=None,
-                         use_checkpoint=use_checkpoint,
-                         img_size=img_size,
-                         patch_size=patch_size,
-                         resi_connection=resi_connection
-
-                         )
-            self.layers.append(layer)
-
-        if self.upsampler == 'pixelshuffle_hf':
-            self.layers_hf = nn.ModuleList()
-            for i_layer in range(self.num_layers):
-                layer = RSTB(dim=embed_dim,
-                             input_resolution=(patches_resolution[0],
-                                               patches_resolution[1]),
-                             depth=depths[i_layer],
-                             num_heads=num_heads[i_layer],
-                             window_size=window_size,
-                             mlp_ratio=self.mlp_ratio,
-                             qkv_bias=qkv_bias,
-                             drop=drop_rate, attn_drop=attn_drop_rate,
-                             drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],  # no impact on SR results
-                             norm_layer=norm_layer,
-                             downsample=None,
-                             use_checkpoint=use_checkpoint,
-                             img_size=img_size,
-                             patch_size=patch_size,
-                             resi_connection=resi_connection
-
-                             )
-                self.layers_hf.append(layer)
-
-        self.norm = norm_layer(self.num_features)
-
-        # build the last conv layer in deep feature extraction
-        if resi_connection == '1conv':
-            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
-        elif resi_connection == '3conv':
-            # to save parameters and memory
-            self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
-                                                 nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                                 nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
-                                                 nn.LeakyReLU(negative_slope=0.2, inplace=True),
-                                                 nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))
-
-        #####################################################################################################
-        ################################ 3, high quality image reconstruction ################################
-        if self.upsampler == 'pixelshuffle':
-            # for classical SR
-            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                                                      nn.LeakyReLU(inplace=True))
-            self.upsample = Upsample(upscale, num_feat)
-            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-        elif self.upsampler == 'pixelshuffle_aux':
-            self.conv_bicubic = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
-            self.conv_before_upsample = nn.Sequential(
-                nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                nn.LeakyReLU(inplace=True))
-            self.conv_aux = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-            self.conv_after_aux = nn.Sequential(
-                nn.Conv2d(3, num_feat, 3, 1, 1),
-                nn.LeakyReLU(inplace=True))
-            self.upsample = Upsample(upscale, num_feat)
-            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-
-        elif self.upsampler == 'pixelshuffle_hf':
-            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                                                      nn.LeakyReLU(inplace=True))
-            self.upsample = Upsample(upscale, num_feat)
-            self.upsample_hf = Upsample_hf(upscale, num_feat)
-            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-            self.conv_first_hf = nn.Sequential(nn.Conv2d(num_feat, embed_dim, 3, 1, 1),
-                                                      nn.LeakyReLU(inplace=True))
-            self.conv_after_body_hf = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
-            self.conv_before_upsample_hf = nn.Sequential(
-                nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                nn.LeakyReLU(inplace=True))
-            self.conv_last_hf = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-
-        elif self.upsampler == 'pixelshuffledirect':
-            # for lightweight SR (to save parameters)
-            self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
-                                            (patches_resolution[0], patches_resolution[1]))
-        elif self.upsampler == 'nearest+conv':
-            # for real-world SR (less artifacts)
-            assert self.upscale == 4, 'only support x4 now.'
-            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
-                                                      nn.LeakyReLU(inplace=True))
-            self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-            self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-            self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-            self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-        else:
-            # for image denoising and JPEG compression artifact reduction
-            self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
-
-        self.apply(self._init_weights)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight, std=.02)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-
-    @torch.jit.ignore
-    def no_weight_decay(self):
-        return {'absolute_pos_embed'}
-
-    @torch.jit.ignore
-    def no_weight_decay_keywords(self):
-        return {'relative_position_bias_table'}
-
-    def check_image_size(self, x):
-        _, _, h, w = x.size()
-        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
-        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
-        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
-        return x
-
-    def forward_features(self, x):
-        x_size = (x.shape[2], x.shape[3])
-        x = self.patch_embed(x)
-        if self.ape:
-            x = x + self.absolute_pos_embed
-        x = self.pos_drop(x)
-
-        for layer in self.layers:
-            x = layer(x, x_size)
-
-        x = self.norm(x)  # B L C
-        x = self.patch_unembed(x, x_size)
-
-        return x
-
-    def forward_features_hf(self, x):
-        x_size = (x.shape[2], x.shape[3])
-        x = self.patch_embed(x)
-        if self.ape:
-            x = x + self.absolute_pos_embed
-        x = self.pos_drop(x)
-
-        for layer in self.layers_hf:
-            x = layer(x, x_size)
-
-        x = self.norm(x)  # B L C
-        x = self.patch_unembed(x, x_size)
-
-        return x
-
-    def forward(self, x):
-        H, W = x.shape[2:]
-        x = self.check_image_size(x)
-
-        self.mean = self.mean.type_as(x)
-        x = (x - self.mean) * self.img_range
-
-        if self.upsampler == 'pixelshuffle':
-            # for classical SR
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.conv_before_upsample(x)
-            x = self.conv_last(self.upsample(x))
-        elif self.upsampler == 'pixelshuffle_aux':
-            bicubic = F.interpolate(x, size=(H * self.upscale, W * self.upscale), mode='bicubic', align_corners=False)
-            bicubic = self.conv_bicubic(bicubic)
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.conv_before_upsample(x)
-            aux = self.conv_aux(x) # b, 3, LR_H, LR_W
-            x = self.conv_after_aux(aux)
-            x = self.upsample(x)[:, :, :H * self.upscale, :W * self.upscale] + bicubic[:, :, :H * self.upscale, :W * self.upscale]
-            x = self.conv_last(x)
-            aux = aux / self.img_range + self.mean
-        elif self.upsampler == 'pixelshuffle_hf':
-            # for classical SR with HF
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x_before = self.conv_before_upsample(x)
-            x_out = self.conv_last(self.upsample(x_before))
-
-            x_hf = self.conv_first_hf(x_before)
-            x_hf = self.conv_after_body_hf(self.forward_features_hf(x_hf)) + x_hf
-            x_hf = self.conv_before_upsample_hf(x_hf)
-            x_hf = self.conv_last_hf(self.upsample_hf(x_hf))
-            x = x_out + x_hf
-            x_hf = x_hf / self.img_range + self.mean
-
-        elif self.upsampler == 'pixelshuffledirect':
-            # for lightweight SR
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.upsample(x)
-        elif self.upsampler == 'nearest+conv':
-            # for real-world SR
-            x = self.conv_first(x)
-            x = self.conv_after_body(self.forward_features(x)) + x
-            x = self.conv_before_upsample(x)
-            x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
-            x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
-            x = self.conv_last(self.lrelu(self.conv_hr(x)))
-        else:
-            # for image denoising and JPEG compression artifact reduction
-            x_first = self.conv_first(x)
-            res = self.conv_after_body(self.forward_features(x_first)) + x_first
-            x = x + self.conv_last(res)
-
-        x = x / self.img_range + self.mean
-        if self.upsampler == "pixelshuffle_aux":
-            return x[:, :, :H*self.upscale, :W*self.upscale], aux
-
-        elif self.upsampler == "pixelshuffle_hf":
-            x_out = x_out / self.img_range + self.mean
-            return x_out[:, :, :H*self.upscale, :W*self.upscale], x[:, :, :H*self.upscale, :W*self.upscale], x_hf[:, :, :H*self.upscale, :W*self.upscale]
-
-        else:
-            return x[:, :, :H*self.upscale, :W*self.upscale]
-
-    def flops(self):
-        flops = 0
-        H, W = self.patches_resolution
-        flops += H * W * 3 * self.embed_dim * 9
-        flops += self.patch_embed.flops()
-        for layer in self.layers:
-            flops += layer.flops()
-        flops += H * W * 3 * self.embed_dim * self.embed_dim
-        flops += self.upsample.flops()
-        return flops
-
-
-if __name__ == '__main__':
-    upscale = 4
-    window_size = 8
-    height = (1024 // upscale // window_size + 1) * window_size
-    width = (720 // upscale // window_size + 1) * window_size
-    model = Swin2SR(upscale=2, img_size=(height, width),
-                   window_size=window_size, img_range=1., depths=[6, 6, 6, 6],
-                   embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect')
-    print(model)
-    print(height, width, model.flops() / 1e9)
-
-    x = torch.randn((1, 3, height, width))
-    x = model(x)
-    print(x.shape)
-- 
cgit v1.2.3


From 4ad0c0c0a805da4bac03cff86ea17c25a1291546 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Sat, 30 Dec 2023 16:37:03 +0200
Subject: Verify architecture for loaded Spandrel models

---
 extensions-builtin/ScuNET/scripts/scunet_model.py | 2 +-
 extensions-builtin/SwinIR/scripts/swinir_model.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py
index 18cf8e1a..5f3dd08b 100644
--- a/extensions-builtin/ScuNET/scripts/scunet_model.py
+++ b/extensions-builtin/ScuNET/scripts/scunet_model.py
@@ -121,7 +121,7 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
             filename = modelloader.load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name=f"{self.name}.pth")
         else:
             filename = path
-        return modelloader.load_spandrel_model(filename, device=device)
+        return modelloader.load_spandrel_model(filename, device=device, expected_architecture='SCUNet')
 
 
 def on_ui_settings():
diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index 85c18b9e..aae159af 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -75,6 +75,7 @@ class UpscalerSwinIR(Upscaler):
             filename,
             device=self._get_device(),
             dtype=devices.dtype,
+            expected_architecture="SwinIR",
         )
         if getattr(opts, 'SWIN_torch_compile', False):
             try:
-- 
cgit v1.2.3


From bc5ae74c7d8949bab37e260b16e76889b9968099 Mon Sep 17 00:00:00 2001
From: Learwin <6223515+Learwin@users.noreply.github.com>
Date: Sat, 30 Dec 2023 21:52:27 +0100
Subject: Added negative prompts to extra networks lora

---
 extensions-builtin/Lora/ui_edit_user_metadata.py  | 14 ++++++++++++--
 extensions-builtin/Lora/ui_extra_networks_lora.py |  9 +++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_edit_user_metadata.py b/extensions-builtin/Lora/ui_edit_user_metadata.py
index c7011909..f7859b21 100644
--- a/extensions-builtin/Lora/ui_edit_user_metadata.py
+++ b/extensions-builtin/Lora/ui_edit_user_metadata.py
@@ -54,12 +54,14 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
         self.slider_preferred_weight = None
         self.edit_notes = None
 
-    def save_lora_user_metadata(self, name, desc, sd_version, activation_text, preferred_weight, notes):
+    def save_lora_user_metadata(self, name, desc, sd_version, activation_text, preferred_weight, negative_text, negative_weight, notes):
         user_metadata = self.get_user_metadata(name)
         user_metadata["description"] = desc
         user_metadata["sd version"] = sd_version
         user_metadata["activation text"] = activation_text
         user_metadata["preferred weight"] = preferred_weight
+        user_metadata["negative text"] = negative_text
+        user_metadata["negative weight"] = negative_weight
         user_metadata["notes"] = notes
 
         self.write_user_metadata(name, user_metadata)
@@ -127,6 +129,8 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
             gr.HighlightedText.update(value=gradio_tags, visible=True if tags else False),
             user_metadata.get('activation text', ''),
             float(user_metadata.get('preferred weight', 0.0)),
+            user_metadata.get('negative text', ''),
+            float(user_metadata.get('negative weight', 0.0)),
             gr.update(visible=True if tags else False),
             gr.update(value=self.generate_random_prompt_from_tags(tags), visible=True if tags else False),
         ]
@@ -162,7 +166,8 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
         self.taginfo = gr.HighlightedText(label="Training dataset tags")
         self.edit_activation_text = gr.Text(label='Activation text', info="Will be added to prompt along with Lora")
         self.slider_preferred_weight = gr.Slider(label='Preferred weight', info="Set to 0 to disable", minimum=0.0, maximum=2.0, step=0.01)
-
+        self.edit_negative_text = gr.Text(label='Negative prompt', info="Will be added to negative prompts")
+        self.slider_negative_weight = gr.Slider(label='Preferred negative weight', info="Set to 0 to disable", minimum=0.0, maximum=2.0, step=0.01)
         with gr.Row() as row_random_prompt:
             with gr.Column(scale=8):
                 random_prompt = gr.Textbox(label='Random prompt', lines=4, max_lines=4, interactive=False)
@@ -198,6 +203,8 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
             self.taginfo,
             self.edit_activation_text,
             self.slider_preferred_weight,
+            self.edit_negative_text,
+            self.slider_negative_weight,
             row_random_prompt,
             random_prompt,
         ]
@@ -211,7 +218,10 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
             self.select_sd_version,
             self.edit_activation_text,
             self.slider_preferred_weight,
+            self.edit_negative_text,
+            self.slider_negative_weight,
             self.edit_notes,
         ]
 
+
         self.setup_save_handler(self.button_save, self.save_lora_user_metadata, edited_components)
diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index df02c663..09ce2a05 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -45,6 +45,15 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         if activation_text:
             item["prompt"] += " + " + quote_js(" " + activation_text)
 
+        negative_prompt = item["user_metadata"].get("negative text")
+        preferred_negative_weight = item["user_metadata"].get("negative weight")
+        item["negative_prompt"] = quote_js("")
+        if negative_prompt:
+            neg_prompt = negative_prompt
+            if (preferred_negative_weight > 0):
+                neg_prompt = '(' + negative_prompt + ':' + str(preferred_negative_weight) + ')'
+            item["negative_prompt"] = quote_js(neg_prompt)  
+            
         sd_version = item["user_metadata"].get("sd version")
         if sd_version in network.SdVersion.__members__:
             item["sd_version"] = sd_version
-- 
cgit v1.2.3


From a2f23f9d22dde87bf2529dcb2854a6a5d3d44278 Mon Sep 17 00:00:00 2001
From: Learwin <6223515+Learwin@users.noreply.github.com>
Date: Sat, 30 Dec 2023 22:16:51 +0100
Subject: Code Style fixes

---
 extensions-builtin/Lora/ui_extra_networks_lora.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index 09ce2a05..9a6624e3 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -52,8 +52,8 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
             neg_prompt = negative_prompt
             if (preferred_negative_weight > 0):
                 neg_prompt = '(' + negative_prompt + ':' + str(preferred_negative_weight) + ')'
-            item["negative_prompt"] = quote_js(neg_prompt)  
-            
+            item["negative_prompt"] = quote_js(neg_prompt)
+
         sd_version = item["user_metadata"].get("sd version")
         if sd_version in network.SdVersion.__members__:
             item["sd_version"] = sd_version
-- 
cgit v1.2.3


From 777af661a21821994993df3ef566b01df2bb61a0 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Sun, 31 Dec 2023 00:09:51 +0200
Subject: Be more clear about Spandrel model nomenclature

---
 extensions-builtin/SwinIR/scripts/swinir_model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index aae159af..95c7ec64 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -71,7 +71,7 @@ class UpscalerSwinIR(Upscaler):
         else:
             filename = path
 
-        model = modelloader.load_spandrel_model(
+        model_descriptor = modelloader.load_spandrel_model(
             filename,
             device=self._get_device(),
             dtype=devices.dtype,
@@ -79,10 +79,10 @@ class UpscalerSwinIR(Upscaler):
         )
         if getattr(opts, 'SWIN_torch_compile', False):
             try:
-                model = torch.compile(model)
+                model_descriptor.model.compile()
             except Exception:
                 logger.warning("Failed to compile SwinIR model, fallback to JIT", exc_info=True)
-        return model
+        return model_descriptor
 
     def _get_device(self):
         return devices.get_device_for('swinir')
-- 
cgit v1.2.3


From 6f86b62a1be7993073ba3a789d522e0b8870605a Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Sat, 30 Dec 2023 22:53:49 +0200
Subject: Deduplicate tiled inference code from SwinIR/ScuNET

---
 extensions-builtin/ScuNET/scripts/scunet_model.py | 55 +++++-----------------
 extensions-builtin/SwinIR/scripts/swinir_model.py | 57 ++---------------------
 2 files changed, 16 insertions(+), 96 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py
index 5f3dd08b..f799cb76 100644
--- a/extensions-builtin/ScuNET/scripts/scunet_model.py
+++ b/extensions-builtin/ScuNET/scripts/scunet_model.py
@@ -3,12 +3,11 @@ import sys
 import PIL.Image
 import numpy as np
 import torch
-from tqdm import tqdm
 
 import modules.upscaler
 from modules import devices, modelloader, script_callbacks, errors
-
 from modules.shared import opts
+from modules.upscaler_utils import tiled_upscale_2
 
 
 class UpscalerScuNET(modules.upscaler.Upscaler):
@@ -40,47 +39,6 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
             scalers.append(scaler_data2)
         self.scalers = scalers
 
-    @staticmethod
-    @torch.no_grad()
-    def tiled_inference(img, model):
-        # test the image tile by tile
-        h, w = img.shape[2:]
-        tile = opts.SCUNET_tile
-        tile_overlap = opts.SCUNET_tile_overlap
-        if tile == 0:
-            return model(img)
-
-        device = devices.get_device_for('scunet')
-        assert tile % 8 == 0, "tile size should be a multiple of window_size"
-        sf = 1
-
-        stride = tile - tile_overlap
-        h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
-        w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
-        E = torch.zeros(1, 3, h * sf, w * sf, dtype=img.dtype, device=device)
-        W = torch.zeros_like(E, dtype=devices.dtype, device=device)
-
-        with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="ScuNET tiles") as pbar:
-            for h_idx in h_idx_list:
-
-                for w_idx in w_idx_list:
-
-                    in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
-
-                    out_patch = model(in_patch)
-                    out_patch_mask = torch.ones_like(out_patch)
-
-                    E[
-                        ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                    ].add_(out_patch)
-                    W[
-                        ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                    ].add_(out_patch_mask)
-                    pbar.update(1)
-        output = E.div_(W)
-
-        return output
-
     def do_upscale(self, img: PIL.Image.Image, selected_file):
 
         devices.torch_gc()
@@ -104,7 +62,16 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
             _img[:, :, :h, :w] = torch_img # pad image
             torch_img = _img
 
-        torch_output = self.tiled_inference(torch_img, model).squeeze(0)
+        with torch.no_grad():
+            torch_output = tiled_upscale_2(
+                torch_img,
+                model,
+                tile_size=opts.SCUNET_tile,
+                tile_overlap=opts.SCUNET_tile_overlap,
+                scale=1,
+                device=devices.get_device_for('scunet'),
+                desc="ScuNET tiles",
+            ).squeeze(0)
         torch_output = torch_output[:, :h * 1, :w * 1] # remove padding, if any
         np_output: np.ndarray = torch_output.float().cpu().clamp_(0, 1).numpy()
         del torch_img, torch_output
diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index 95c7ec64..8a555c79 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -4,11 +4,11 @@ import sys
 import numpy as np
 import torch
 from PIL import Image
-from tqdm import tqdm
 
 from modules import modelloader, devices, script_callbacks, shared
-from modules.shared import opts, state
+from modules.shared import opts
 from modules.upscaler import Upscaler, UpscalerData
+from modules.upscaler_utils import tiled_upscale_2
 
 SWINIR_MODEL_URL = "https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth"
 
@@ -110,14 +110,14 @@ def upscale(
         w_pad = (w_old // window_size + 1) * window_size - w_old
         img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, : h_old + h_pad, :]
         img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, : w_old + w_pad]
-        output = inference(
+        output = tiled_upscale_2(
             img,
             model,
-            tile=tile,
+            tile_size=tile,
             tile_overlap=tile_overlap,
-            window_size=window_size,
             scale=scale,
             device=device,
+            desc="SwinIR tiles",
         )
         output = output[..., : h_old * scale, : w_old * scale]
         output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
@@ -129,53 +129,6 @@ def upscale(
         return Image.fromarray(output, "RGB")
 
 
-def inference(
-    img,
-    model,
-    *,
-    tile: int,
-    tile_overlap: int,
-    window_size: int,
-    scale: int,
-    device,
-):
-    # test the image tile by tile
-    b, c, h, w = img.size()
-    tile = min(tile, h, w)
-    assert tile % window_size == 0, "tile size should be a multiple of window_size"
-    sf = scale
-
-    stride = tile - tile_overlap
-    h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
-    w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
-    E = torch.zeros(b, c, h * sf, w * sf, dtype=devices.dtype, device=device).type_as(img)
-    W = torch.zeros_like(E, dtype=devices.dtype, device=device)
-
-    with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="SwinIR tiles") as pbar:
-        for h_idx in h_idx_list:
-            if state.interrupted or state.skipped:
-                break
-
-            for w_idx in w_idx_list:
-                if state.interrupted or state.skipped:
-                    break
-
-                in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
-                out_patch = model(in_patch)
-                out_patch_mask = torch.ones_like(out_patch)
-
-                E[
-                ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                ].add_(out_patch)
-                W[
-                ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                ].add_(out_patch_mask)
-                pbar.update(1)
-    output = E.div_(W)
-
-    return output
-
-
 def on_ui_settings():
     import gradio as gr
 
-- 
cgit v1.2.3


From d4945f4422e5a0bf31a6dbe4c1aeedd78c09eacb Mon Sep 17 00:00:00 2001
From: Learwin <6223515+Learwin@users.noreply.github.com>
Date: Sun, 31 Dec 2023 13:22:30 +0100
Subject: Removed weight slider for negative prompts

---
 extensions-builtin/Lora/ui_edit_user_metadata.py  | 7 +------
 extensions-builtin/Lora/ui_extra_networks_lora.py | 6 +-----
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/Lora/ui_edit_user_metadata.py b/extensions-builtin/Lora/ui_edit_user_metadata.py
index f7859b21..3160aecf 100644
--- a/extensions-builtin/Lora/ui_edit_user_metadata.py
+++ b/extensions-builtin/Lora/ui_edit_user_metadata.py
@@ -54,14 +54,13 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
         self.slider_preferred_weight = None
         self.edit_notes = None
 
-    def save_lora_user_metadata(self, name, desc, sd_version, activation_text, preferred_weight, negative_text, negative_weight, notes):
+    def save_lora_user_metadata(self, name, desc, sd_version, activation_text, preferred_weight, negative_text, notes):
         user_metadata = self.get_user_metadata(name)
         user_metadata["description"] = desc
         user_metadata["sd version"] = sd_version
         user_metadata["activation text"] = activation_text
         user_metadata["preferred weight"] = preferred_weight
         user_metadata["negative text"] = negative_text
-        user_metadata["negative weight"] = negative_weight
         user_metadata["notes"] = notes
 
         self.write_user_metadata(name, user_metadata)
@@ -130,7 +129,6 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
             user_metadata.get('activation text', ''),
             float(user_metadata.get('preferred weight', 0.0)),
             user_metadata.get('negative text', ''),
-            float(user_metadata.get('negative weight', 0.0)),
             gr.update(visible=True if tags else False),
             gr.update(value=self.generate_random_prompt_from_tags(tags), visible=True if tags else False),
         ]
@@ -167,7 +165,6 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
         self.edit_activation_text = gr.Text(label='Activation text', info="Will be added to prompt along with Lora")
         self.slider_preferred_weight = gr.Slider(label='Preferred weight', info="Set to 0 to disable", minimum=0.0, maximum=2.0, step=0.01)
         self.edit_negative_text = gr.Text(label='Negative prompt', info="Will be added to negative prompts")
-        self.slider_negative_weight = gr.Slider(label='Preferred negative weight', info="Set to 0 to disable", minimum=0.0, maximum=2.0, step=0.01)
         with gr.Row() as row_random_prompt:
             with gr.Column(scale=8):
                 random_prompt = gr.Textbox(label='Random prompt', lines=4, max_lines=4, interactive=False)
@@ -204,7 +201,6 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
             self.edit_activation_text,
             self.slider_preferred_weight,
             self.edit_negative_text,
-            self.slider_negative_weight,
             row_random_prompt,
             random_prompt,
         ]
@@ -219,7 +215,6 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)
             self.edit_activation_text,
             self.slider_preferred_weight,
             self.edit_negative_text,
-            self.slider_negative_weight,
             self.edit_notes,
         ]
 
diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index 9a6624e3..e714fac4 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -46,13 +46,9 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
             item["prompt"] += " + " + quote_js(" " + activation_text)
 
         negative_prompt = item["user_metadata"].get("negative text")
-        preferred_negative_weight = item["user_metadata"].get("negative weight")
         item["negative_prompt"] = quote_js("")
         if negative_prompt:
-            neg_prompt = negative_prompt
-            if (preferred_negative_weight > 0):
-                neg_prompt = '(' + negative_prompt + ':' + str(preferred_negative_weight) + ')'
-            item["negative_prompt"] = quote_js(neg_prompt)
+            item["negative_prompt"] = quote_js('(' + negative_prompt + ':1)')
 
         sd_version = item["user_metadata"].get("sd version")
         if sd_version in network.SdVersion.__members__:
-- 
cgit v1.2.3


From d859cec696a953dbfd6f69f7735e68661748d579 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Mon, 1 Jan 2024 13:53:12 +0300
Subject: infotext.py: rename usages in the codebase

---
 .../extra-options-section/scripts/extra_options_section.py            | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'extensions-builtin')

diff --git a/extensions-builtin/extra-options-section/scripts/extra_options_section.py b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
index ac2c3de4..8aa901fd 100644
--- a/extensions-builtin/extra-options-section/scripts/extra_options_section.py
+++ b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
@@ -1,7 +1,7 @@
 import math
 
 import gradio as gr
-from modules import scripts, shared, ui_components, ui_settings, generation_parameters_copypaste
+from modules import scripts, shared, ui_components, ui_settings, infotext
 from modules.ui_components import FormColumn
 
 
@@ -25,7 +25,7 @@ class ExtraOptionsSection(scripts.Script):
         extra_options = shared.opts.extra_options_img2img if is_img2img else shared.opts.extra_options_txt2img
         elem_id_tabname = "extra_options_" + ("img2img" if is_img2img else "txt2img")
 
-        mapping = {k: v for v, k in generation_parameters_copypaste.infotext_to_setting_name_mapping}
+        mapping = {k: v for v, k in infotext.infotext_to_setting_name_mapping}
 
         with gr.Blocks() as interface:
             with gr.Accordion("Options", open=False, elem_id=elem_id_tabname) if shared.opts.extra_options_accordion and extra_options else gr.Group(elem_id=elem_id_tabname):
-- 
cgit v1.2.3