diff options
author | AUTOMATIC1111 <16777216c@gmail.com> | 2023-08-28 12:03:46 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-28 12:03:46 +0000 |
commit | 9e14cac3182b53173f1c356f2f13bd1cb0cedc89 (patch) | |
tree | 12b4e04a8f17c4275c26bc23870cab0535e053e0 /modules/sd_hijack_optimizations.py | |
parent | f898833ea38718e87b39ab090b2a2325638559cb (diff) | |
parent | 8632452627e1341bcd447dbec3c1516f319200a0 (diff) | |
download | stable-diffusion-webui-gfx803-9e14cac3182b53173f1c356f2f13bd1cb0cedc89.tar.gz stable-diffusion-webui-gfx803-9e14cac3182b53173f1c356f2f13bd1cb0cedc89.tar.bz2 stable-diffusion-webui-gfx803-9e14cac3182b53173f1c356f2f13bd1cb0cedc89.zip |
Merge branch 'dev' into patch-1
Diffstat (limited to 'modules/sd_hijack_optimizations.py')
-rw-r--r-- | modules/sd_hijack_optimizations.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 0e810eec..7f9e328d 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -1,6 +1,7 @@ from __future__ import annotations
import math
import psutil
+import platform
import torch
from torch import einsum
@@ -94,7 +95,10 @@ class SdOptimizationSdp(SdOptimizationSdpNoMem): class SdOptimizationSubQuad(SdOptimization):
name = "sub-quadratic"
cmd_opt = "opt_sub_quad_attention"
- priority = 10
+
+ @property
+ def priority(self):
+ return 1000 if shared.device.type == 'mps' else 10
def apply(self):
ldm.modules.attention.CrossAttention.forward = sub_quad_attention_forward
@@ -120,7 +124,7 @@ class SdOptimizationInvokeAI(SdOptimization): @property
def priority(self):
- return 1000 if not torch.cuda.is_available() else 10
+ return 1000 if shared.device.type != 'mps' and not torch.cuda.is_available() else 10
def apply(self):
ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_invokeAI
@@ -427,7 +431,10 @@ def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_ qk_matmul_size_bytes = batch_x_heads * bytes_per_token * q_tokens * k_tokens
if chunk_threshold is None:
- chunk_threshold_bytes = int(get_available_vram() * 0.9) if q.device.type == 'mps' else int(get_available_vram() * 0.7)
+ if q.device.type == 'mps':
+ chunk_threshold_bytes = 268435456 * (2 if platform.processor() == 'i386' else bytes_per_token)
+ else:
+ chunk_threshold_bytes = int(get_available_vram() * 0.7)
elif chunk_threshold == 0:
chunk_threshold_bytes = None
else:
|