aboutsummaryrefslogtreecommitdiffstats
path: root/modules/sd_hijack_optimizations.py
diff options
context:
space:
mode:
authorAUTOMATIC1111 <16777216c@gmail.com>2022-10-19 06:43:49 +0000
committerGitHub <noreply@github.com>2022-10-19 06:43:49 +0000
commit05315d8a236e252221bbbdd9e8f459b8a31c3524 (patch)
tree0bce187060568747888571fafedca4974fe17af3 /modules/sd_hijack_optimizations.py
parent9a33292ce41b01252cdb8ab6214a11d274e32fa0 (diff)
parent1d4aa376e6111e90888a30ae24d2bcd7f978ec51 (diff)
downloadstable-diffusion-webui-gfx803-05315d8a236e252221bbbdd9e8f459b8a31c3524.tar.gz
stable-diffusion-webui-gfx803-05315d8a236e252221bbbdd9e8f459b8a31c3524.tar.bz2
stable-diffusion-webui-gfx803-05315d8a236e252221bbbdd9e8f459b8a31c3524.zip
Merge branch 'master' into hot-reload-javascript
Diffstat (limited to 'modules/sd_hijack_optimizations.py')
-rw-r--r--modules/sd_hijack_optimizations.py16
1 files changed, 11 insertions, 5 deletions
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 79405525..98123fbf 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -181,7 +181,7 @@ def einsum_op_cuda(q, k, v):
mem_free_torch = mem_reserved - mem_active
mem_free_total = mem_free_cuda + mem_free_torch
# Divide factor of safety as there's copying and fragmentation
- return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
+ return einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
def einsum_op(q, k, v):
if q.device.type == 'cuda':
@@ -296,10 +296,16 @@ def xformers_attnblock_forward(self, x):
try:
h_ = x
h_ = self.norm(h_)
- q1 = self.q(h_).contiguous()
- k1 = self.k(h_).contiguous()
- v = self.v(h_).contiguous()
- out = xformers.ops.memory_efficient_attention(q1, k1, v)
+ q = self.q(h_)
+ k = self.k(h_)
+ v = self.v(h_)
+ b, c, h, w = q.shape
+ q, k, v = map(lambda t: rearrange(t, 'b c h w -> b (h w) c'), (q, k, v))
+ q = q.contiguous()
+ k = k.contiguous()
+ v = v.contiguous()
+ out = xformers.ops.memory_efficient_attention(q, k, v)
+ out = rearrange(out, 'b (h w) c -> b c h w', h=h)
out = self.proj_out(out)
return x + out
except NotImplementedError: