diff options
author | brkirch <brkirch@users.noreply.github.com> | 2022-10-11 07:32:11 +0000 |
---|---|---|
committer | AUTOMATIC1111 <16777216c@gmail.com> | 2022-10-11 14:24:00 +0000 |
commit | 574c8e554a5371eca2cbf344764cb241c6ec4efc (patch) | |
tree | 61d10c398e4e7cd5d16ba249d7e6dc291b30dfcc | |
parent | 98fd5cde72d5bda1620ab78416c7828fdc3dc10b (diff) | |
download | stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.tar.gz stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.tar.bz2 stable-diffusion-webui-gfx803-574c8e554a5371eca2cbf344764cb241c6ec4efc.zip |
Add InvokeAI and lstein to credits, add back CUDA support
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | modules/sd_hijack_optimizations.py | 13 |
2 files changed, 14 insertions, 0 deletions
@@ -123,6 +123,7 @@ The documentation was moved from this README over to the project's [wiki](https: - LDSR - https://github.com/Hafiidz/latent-diffusion
- Ideas for optimizations - https://github.com/basujindal/stable-diffusion
- Doggettx - Cross Attention layer optimization - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing.
+- InvokeAI, lstein - Cross Attention layer optimization - https://github.com/invoke-ai/InvokeAI (originally http://github.com/lstein/stable-diffusion)
- Rinon Gal - Textual Inversion - https://github.com/rinongal/textual_inversion (we're not using his code, but we are using his ideas).
- Idea for SD upscale - https://github.com/jquesnelle/txt2imghd
- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 2a4ac7e0..f006427f 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -173,7 +173,20 @@ def einsum_op_tensor_mem(q, k, v, max_tensor_mb): return einsum_op_slice_0(q, k, v, q.shape[0] // div)
return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
+def einsum_op_cuda(q, k, v):
+ stats = torch.cuda.memory_stats(q.device)
+ mem_active = stats['active_bytes.all.current']
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_free_cuda, _ = torch.cuda.mem_get_info(q.device)
+ mem_free_torch = mem_reserved - mem_active
+ mem_free_total = mem_free_cuda + mem_free_torch
+ # Divide factor of safety as there's copying and fragmentation
+ return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
+
def einsum_op(q, k, v):
+ if q.device.type == 'cuda':
+ return einsum_op_cuda(q, k, v)
+
if q.device.type == 'mps':
if mem_total_gb >= 32:
return einsum_op_mps_v1(q, k, v)
|