Merge pull request #6055 from brkirch/sub-quad_attn_opt

Add Birch-san's sub-quadratic attention implementation
author: AUTOMATIC1111 <16777216c@gmail.com> 2023-01-07 09:26:55 +0000
committer: GitHub <noreply@github.com> 2023-01-07 09:26:55 +0000
commit: c295e4a2446bcc2663f497ba8afa14cec80de332 (patch)
tree: 606ede9bd1bf0c13b59c26a63755a2f95f6b8da6 /modules/shared.py
parent: 1a5b86ad65fd738eadea1ad72f4abad3a4aabf17 (diff)
parent: c18add68ef7d2de3617cbbaff864b0c74cfdf6c0 (diff)
download: stable-diffusion-webui-gfx803-c295e4a2446bcc2663f497ba8afa14cec80de332.tar.gz
stable-diffusion-webui-gfx803-c295e4a2446bcc2663f497ba8afa14cec80de332.tar.bz2
stable-diffusion-webui-gfx803-c295e4a2446bcc2663f497ba8afa14cec80de332.zip
1 files changed, 4 insertions, 0 deletions
diff --git a/modules/shared.py b/modules/shared.py
index 865c3c07..a6712dae 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -56,6 +56,10 @@ parser.add_argument("--xformers", action='store_true', help="enable xformers for
 parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
 parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything")
 parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.")
+parser.add_argument("--opt-sub-quad-attention", action='store_true', help="enable memory efficient sub-quadratic cross-attention layer optimization")
+parser.add_argument("--sub-quad-q-chunk-size", type=int, help="query chunk size for the sub-quadratic cross-attention layer optimization to use", default=1024)
+parser.add_argument("--sub-quad-kv-chunk-size", type=int, help="kv chunk size for the sub-quadratic cross-attention layer optimization to use", default=None)
+parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the percentage of VRAM threshold for the sub-quadratic cross-attention layer optimization to use chunking", default=None)
 parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
 parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
author	AUTOMATIC1111 <16777216c@gmail.com>	2023-01-07 09:26:55 +0000
committer	GitHub <noreply@github.com>	2023-01-07 09:26:55 +0000
commit	c295e4a2446bcc2663f497ba8afa14cec80de332 (patch)
tree	606ede9bd1bf0c13b59c26a63755a2f95f6b8da6 /modules/shared.py
parent	1a5b86ad65fd738eadea1ad72f4abad3a4aabf17 (diff)
parent	c18add68ef7d2de3617cbbaff864b0c74cfdf6c0 (diff)
download	stable-diffusion-webui-gfx803-c295e4a2446bcc2663f497ba8afa14cec80de332.tar.gz stable-diffusion-webui-gfx803-c295e4a2446bcc2663f497ba8afa14cec80de332.tar.bz2 stable-diffusion-webui-gfx803-c295e4a2446bcc2663f497ba8afa14cec80de332.zip