diff options
author | AUTOMATIC1111 <16777216c@gmail.com> | 2023-12-14 06:39:57 +0000 |
---|---|---|
committer | AUTOMATIC1111 <16777216c@gmail.com> | 2023-12-14 07:14:19 +0000 |
commit | 6ef0ff39f2a35a02e5380e522e1dff3eafd7ccfc (patch) | |
tree | a21b7f198fd345c363fb1abc4a23ffcd2c963ed8 /extensions-builtin/Lora/network_oft.py | |
parent | 120a84bd2f01ec4489bd12bd68f319798ef30782 (diff) | |
download | stable-diffusion-webui-gfx803-6ef0ff39f2a35a02e5380e522e1dff3eafd7ccfc.tar.gz stable-diffusion-webui-gfx803-6ef0ff39f2a35a02e5380e522e1dff3eafd7ccfc.tar.bz2 stable-diffusion-webui-gfx803-6ef0ff39f2a35a02e5380e522e1dff3eafd7ccfc.zip |
Merge pull request #14300 from AUTOMATIC1111/oft_fixes
Fix wrong implementation in network_oft
Diffstat (limited to 'extensions-builtin/Lora/network_oft.py')
-rw-r--r-- | extensions-builtin/Lora/network_oft.py | 37 |
1 files changed, 11 insertions, 26 deletions
diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py index 05c37811..fa647020 100644 --- a/extensions-builtin/Lora/network_oft.py +++ b/extensions-builtin/Lora/network_oft.py @@ -21,6 +21,8 @@ class NetworkModuleOFT(network.NetworkModule): self.lin_module = None self.org_module: list[torch.Module] = [self.sd_module] + self.scale = 1.0 + # kohya-ss if "oft_blocks" in weights.w.keys(): self.is_kohya = True @@ -53,12 +55,18 @@ class NetworkModuleOFT(network.NetworkModule): self.constraint = None self.block_size, self.num_blocks = factorization(self.out_dim, self.dim) - def calc_updown_kb(self, orig_weight, multiplier): + def calc_updown(self, orig_weight): oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype) - oft_blocks = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix + eye = torch.eye(self.block_size, device=self.oft_blocks.device) + + if self.is_kohya: + block_Q = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix + norm_Q = torch.norm(block_Q.flatten()) + new_norm_Q = torch.clamp(norm_Q, max=self.constraint) + block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8)) + oft_blocks = torch.matmul(eye + block_Q, (eye - block_Q).float().inverse()) R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype) - R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device) # This errors out for MultiheadAttention, might need to be handled up-stream merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size) @@ -72,26 +80,3 @@ class NetworkModuleOFT(network.NetworkModule): updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight output_shape = orig_weight.shape return self.finalize_updown(updown, orig_weight, output_shape) - - def calc_updown(self, orig_weight): - # if alpha is a very small number as in coft, calc_scale() will return a almost zero number so we ignore it - multiplier = self.multiplier() - return self.calc_updown_kb(orig_weight, multiplier) - - # override to remove the multiplier/scale factor; it's already multiplied in get_weight - def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None): - if self.bias is not None: - updown = updown.reshape(self.bias.shape) - updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype) - updown = updown.reshape(output_shape) - - if len(output_shape) == 4: - updown = updown.reshape(output_shape) - - if orig_weight.size().numel() == updown.size().numel(): - updown = updown.reshape(orig_weight.shape) - - if ex_bias is not None: - ex_bias = ex_bias * self.multiplier() - - return updown, ex_bias |