diff options
author | Pam <pamhome21@gmail.com> | 2023-03-10 07:19:36 +0000 |
---|---|---|
committer | Pam <pamhome21@gmail.com> | 2023-03-10 07:19:36 +0000 |
commit | 37acba263389e22bc46cfffc80b2ca8b76a85287 (patch) | |
tree | 67dd2e6d1749b44b28dcb0d73f5ecf945493f245 /modules | |
parent | fec0a895119a124a295e3dad5205de5766031dc7 (diff) | |
download | stable-diffusion-webui-gfx803-37acba263389e22bc46cfffc80b2ca8b76a85287.tar.gz stable-diffusion-webui-gfx803-37acba263389e22bc46cfffc80b2ca8b76a85287.tar.bz2 stable-diffusion-webui-gfx803-37acba263389e22bc46cfffc80b2ca8b76a85287.zip |
argument to disable memory efficient for sdp
Diffstat (limited to 'modules')
-rw-r--r-- | modules/sd_hijack.py | 11 | ||||
-rw-r--r-- | modules/sd_hijack_optimizations.py | 4 | ||||
-rw-r--r-- | modules/shared.py | 1 |
3 files changed, 13 insertions, 3 deletions
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 76cb9120..f62e9adb 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -43,9 +43,14 @@ def apply_optimizations(): ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward
optimization_method = 'xformers'
elif cmd_opts.opt_sdp_attention and (hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(getattr(torch.nn.functional, "scaled_dot_product_attention"))):
- print("Applying scaled dot product cross attention optimization.")
- ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.scaled_dot_product_attention_forward
- optimization_method = 'sdp'
+ if cmd_opts.opt_sdp_no_mem_attention:
+ print("Applying scaled dot product cross attention optimization (without memory efficient attention).")
+ ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.scaled_dot_product_no_mem_attention_forward
+ optimization_method = 'sdp-no-mem'
+ else:
+ print("Applying scaled dot product cross attention optimization.")
+ ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.scaled_dot_product_attention_forward
+ optimization_method = 'sdp'
elif cmd_opts.opt_sub_quad_attention:
print("Applying sub-quadratic cross attention optimization.")
ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.sub_quad_attention_forward
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index a324a592..68b1dd84 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -388,6 +388,10 @@ def scaled_dot_product_attention_forward(self, x, context=None, mask=None): hidden_states = self.to_out[1](hidden_states)
return hidden_states
+def scaled_dot_product_no_mem_attention_forward(self, x, context=None, mask=None):
+ with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=False):
+ return scaled_dot_product_attention_forward(self, x, context, mask)
+
def cross_attention_attnblock_forward(self, x):
h_ = x
h_ = self.norm(h_)
diff --git a/modules/shared.py b/modules/shared.py index 12d0756b..4b81c591 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -70,6 +70,7 @@ parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the percentage parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
parser.add_argument("--opt-sdp-attention", action='store_true', help="enable scaled dot product cross-attention layer optimization; requires PyTorch 2.*")
+parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="disables memory efficient sdp, makes image generation deterministic; requires --opt-sdp-attention")
parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI")
parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
|