Add InvokeAI and lstein to credits, add back CUDA support

author: brkirch <brkirch@users.noreply.github.com> 2022-10-11 07:32:11 +0000
committer: AUTOMATIC1111 <16777216c@gmail.com> 2022-10-11 14:24:00 +0000
commit: 574c8e554a5371eca2cbf344764cb241c6ec4efc (patch)
tree: 61d10c398e4e7cd5d16ba249d7e6dc291b30dfcc /modules/sd_hijack_optimizations.py
parent: 98fd5cde72d5bda1620ab78416c7828fdc3dc10b (diff)
download: stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.tar.gz
stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.tar.bz2
stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.zip
1 files changed, 13 insertions, 0 deletions
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 2a4ac7e0..f006427f 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -173,7 +173,20 @@ def einsum_op_tensor_mem(q, k, v, max_tensor_mb):
         return einsum_op_slice_0(q, k, v, q.shape[0] // div)
     return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
 
+def einsum_op_cuda(q, k, v):
+    stats = torch.cuda.memory_stats(q.device)
+    mem_active = stats['active_bytes.all.current']
+    mem_reserved = stats['reserved_bytes.all.current']
+    mem_free_cuda, _ = torch.cuda.mem_get_info(q.device)
+    mem_free_torch = mem_reserved - mem_active
+    mem_free_total = mem_free_cuda + mem_free_torch
+    # Divide factor of safety as there's copying and fragmentation
+    return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
+
 def einsum_op(q, k, v):
+    if q.device.type == 'cuda':
+        return einsum_op_cuda(q, k, v)
+
     if q.device.type == 'mps':
         if mem_total_gb >= 32:
             return einsum_op_mps_v1(q, k, v)
author	brkirch <brkirch@users.noreply.github.com>	2022-10-11 07:32:11 +0000
committer	AUTOMATIC1111 <16777216c@gmail.com>	2022-10-11 14:24:00 +0000
commit	574c8e554a5371eca2cbf344764cb241c6ec4efc (patch)
tree	61d10c398e4e7cd5d16ba249d7e6dc291b30dfcc /modules/sd_hijack_optimizations.py
parent	98fd5cde72d5bda1620ab78416c7828fdc3dc10b (diff)
download	stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.tar.gz stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.tar.bz2 stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.zip