From b5253f0dab529707f1fe2e11211a10ce2f264617 Mon Sep 17 00:00:00 2001
From: noodleanon <122053346+noodleanon@users.noreply.github.com>
Date: Thu, 5 Jan 2023 21:21:48 +0000
Subject: allow img2img api to run scripts

---
 modules/api/api.py    | 27 ++++++++++++++++++++++++---
 modules/api/models.py |  2 +-
 modules/processing.py |  4 ++--
 3 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'modules')

diff --git a/modules/api/api.py b/modules/api/api.py
index 2103709b..aa62a42e 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -11,7 +11,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
 from secrets import compare_digest
 
 import modules.shared as shared
-from modules import sd_samplers, deepbooru, sd_hijack, images
+from modules import sd_samplers, deepbooru, sd_hijack, images, scripts, ui
 from modules.api.models import *
 from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
 from modules.extras import run_extras
@@ -28,8 +28,13 @@ def upscaler_to_index(name: str):
     try:
         return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
     except:
-        raise HTTPException(status_code=400, detail=f"Invalid upscaler, needs to be on of these: {' , '.join([x.name for x in sd_upscalers])}")
+        raise HTTPException(status_code=400, detail=f"Invalid upscaler, needs to be one of these: {' , '.join([x.name for x in sd_upscalers])}")
 
+def script_name_to_index(name, scripts):
+    try:
+        return [script.title().lower() for script in scripts].index(name.lower())
+    except:
+        raise HTTPException(status_code=422, detail=f"Script '{name}' not found")
 
 def validate_sampler_name(name):
     config = sd_samplers.all_samplers_map.get(name, None)
@@ -170,6 +175,14 @@ class Api:
         if init_images is None:
             raise HTTPException(status_code=404, detail="Init image not found")
 
+        if img2imgreq.script_name is not None:
+            if scripts.scripts_img2img.scripts == []:
+                scripts.scripts_img2img.initialize_scripts(True)
+                ui.create_ui()
+
+            script_idx = script_name_to_index(img2imgreq.script_name, scripts.scripts_img2img.selectable_scripts)
+            script = scripts.scripts_img2img.selectable_scripts[script_idx]
+
         mask = img2imgreq.mask
         if mask:
             mask = decode_base64_to_image(mask)
@@ -186,13 +199,21 @@ class Api:
 
         args = vars(populate)
         args.pop('include_init_images', None)  # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
+        args.pop('script_name', None)
 
         with self.queue_lock:
             p = StableDiffusionProcessingImg2Img(sd_model=shared.sd_model, **args)
             p.init_images = [decode_base64_to_image(x) for x in init_images]
 
             shared.state.begin()
-            processed = process_images(p)
+            if 'script' in locals():
+                p.outpath_grids = opts.outdir_img2img_grids
+                p.outpath_samples = opts.outdir_img2img_samples
+                p.script_args = [script_idx + 1] + [None] * (script.args_from - 1) + p.script_args
+                processed = scripts.scripts_img2img.run(p, *p.script_args)
+            else:
+                processed = process_images(p)
+
             shared.state.end()
 
         b64images = list(map(encode_pil_to_base64, processed.images))
diff --git a/modules/api/models.py b/modules/api/models.py
index d8198a27..862477e7 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -106,7 +106,7 @@ StableDiffusionTxt2ImgProcessingAPI = PydanticModelGenerator(
 StableDiffusionImg2ImgProcessingAPI = PydanticModelGenerator(
     "StableDiffusionProcessingImg2Img",
     StableDiffusionProcessingImg2Img,
-    [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "init_images", "type": list, "default": None}, {"key": "denoising_strength", "type": float, "default": 0.75}, {"key": "mask", "type": str, "default": None}, {"key": "include_init_images", "type": bool, "default": False, "exclude" : True}]
+    [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "init_images", "type": list, "default": None}, {"key": "denoising_strength", "type": float, "default": 0.75}, {"key": "mask", "type": str, "default": None}, {"key": "include_init_images", "type": bool, "default": False, "exclude" : True}, {"key": "script_name", "type": str, "default": None}, {"key": "script_args", "type": list, "default": []}]
 ).generate_model()
 
 class TextToImageResponse(BaseModel):
diff --git a/modules/processing.py b/modules/processing.py
index a408d622..d5ac7eb1 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -98,7 +98,7 @@ class StableDiffusionProcessing():
     """
     The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
     """
-    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None):
+    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
         if sampler_index is not None:
             print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
 
@@ -149,7 +149,7 @@ class StableDiffusionProcessing():
             self.seed_resize_from_w = 0
 
         self.scripts = None
-        self.script_args = None
+        self.script_args = script_args
         self.all_prompts = None
         self.all_negative_prompts = None
         self.all_seeds = None
-- 
cgit v1.2.3


From d782a95967c9eea753df3333cd1954b6ec73eba0 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Tue, 27 Dec 2022 08:50:55 -0500
Subject: Add Birch-san's sub-quadratic attention implementation

---
 modules/sd_hijack.py               |  15 ++-
 modules/sd_hijack_optimizations.py | 124 ++++++++++++++++++-----
 modules/shared.py                  |   4 +
 modules/sub_quadratic_attention.py | 201 +++++++++++++++++++++++++++++++++++++
 4 files changed, 310 insertions(+), 34 deletions(-)
 create mode 100644 modules/sub_quadratic_attention.py

(limited to 'modules')

diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 690a9ec2..019a6f3f 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -7,8 +7,6 @@ from modules.hypernetworks import hypernetwork
 from modules.shared import cmd_opts
 from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_unet
 
-from modules.sd_hijack_optimizations import invokeAI_mps_available
-
 import ldm.modules.attention
 import ldm.modules.diffusionmodules.model
 import ldm.modules.diffusionmodules.openaimodel
@@ -40,17 +38,16 @@ def apply_optimizations():
         print("Applying xformers cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward
+    elif cmd_opts.opt_sub_quad_attention:
+        print("Applying sub-quadratic cross attention optimization.")
+        ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.sub_quad_attention_forward
+        ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.sub_quad_attnblock_forward
     elif cmd_opts.opt_split_attention_v1:
         print("Applying v1 cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
     elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()):
-        if not invokeAI_mps_available and shared.device.type == 'mps':
-            print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.")
-            print("Applying v1 cross attention optimization.")
-            ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
-        else:
-            print("Applying cross attention optimization (InvokeAI).")
-            ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI
+        print("Applying cross attention optimization (InvokeAI).")
+        ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI
     elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()):
         print("Applying cross attention optimization (Doggettx).")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 02c87f40..f5c153e8 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -1,7 +1,7 @@
 import math
 import sys
 import traceback
-import importlib
+import psutil
 
 import torch
 from torch import einsum
@@ -12,6 +12,8 @@ from einops import rearrange
 from modules import shared
 from modules.hypernetworks import hypernetwork
 
+from .sub_quadratic_attention import efficient_dot_product_attention
+
 
 if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers:
     try:
@@ -22,6 +24,19 @@ if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers:
         print(traceback.format_exc(), file=sys.stderr)
 
 
+def get_available_vram():
+    if shared.device.type == 'cuda':
+        stats = torch.cuda.memory_stats(shared.device)
+        mem_active = stats['active_bytes.all.current']
+        mem_reserved = stats['reserved_bytes.all.current']
+        mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
+        mem_free_torch = mem_reserved - mem_active
+        mem_free_total = mem_free_cuda + mem_free_torch
+        return mem_free_total
+    else:
+        return psutil.virtual_memory().available
+
+
 # see https://github.com/basujindal/stable-diffusion/pull/117 for discussion
 def split_cross_attention_forward_v1(self, x, context=None, mask=None):
     h = self.heads
@@ -76,12 +91,7 @@ def split_cross_attention_forward(self, x, context=None, mask=None):
 
     r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
 
-    stats = torch.cuda.memory_stats(q.device)
-    mem_active = stats['active_bytes.all.current']
-    mem_reserved = stats['reserved_bytes.all.current']
-    mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
-    mem_free_torch = mem_reserved - mem_active
-    mem_free_total = mem_free_cuda + mem_free_torch
+    mem_free_total = get_available_vram()
 
     gb = 1024 ** 3
     tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()
@@ -118,19 +128,8 @@ def split_cross_attention_forward(self, x, context=None, mask=None):
     return self.to_out(r2)
 
 
-def check_for_psutil():
-    try:
-        spec = importlib.util.find_spec('psutil')
-        return spec is not None
-    except ModuleNotFoundError:
-        return False
-
-invokeAI_mps_available = check_for_psutil()
-
 # -- Taken from https://github.com/invoke-ai/InvokeAI and modified --
-if invokeAI_mps_available:
-    import psutil
-    mem_total_gb = psutil.virtual_memory().total // (1 << 30)
+mem_total_gb = psutil.virtual_memory().total // (1 << 30)
 
 def einsum_op_compvis(q, k, v):
     s = einsum('b i d, b j d -> b i j', q, k)
@@ -215,6 +214,70 @@ def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None):
 
 # -- End of code from https://github.com/invoke-ai/InvokeAI --
 
+
+# Based on Birch-san's modified implementation of sub-quadratic attention from https://github.com/Birch-san/diffusers/pull/1
+def sub_quad_attention_forward(self, x, context=None, mask=None):
+    assert mask is None, "attention-mask not currently implemented for SubQuadraticCrossAttnProcessor."
+
+    h = self.heads
+
+    q = self.to_q(x)
+    context = default(context, x)
+
+    context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context)
+    k = self.to_k(context_k)
+    v = self.to_v(context_v)
+    del context, context_k, context_v, x
+
+    q = q.unflatten(-1, (h, -1)).transpose(1,2).flatten(end_dim=1)
+    k = k.unflatten(-1, (h, -1)).transpose(1,2).flatten(end_dim=1)
+    v = v.unflatten(-1, (h, -1)).transpose(1,2).flatten(end_dim=1)
+
+    x = sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold_bytes=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=self.training)
+
+    x = x.unflatten(0, (-1, h)).transpose(1,2).flatten(start_dim=2)
+
+    out_proj, dropout = self.to_out
+    x = out_proj(x)
+    x = dropout(x)
+
+    return x
+
+def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_size_min=None, chunk_threshold_bytes=None, use_checkpoint=True):
+    bytes_per_token = torch.finfo(q.dtype).bits//8
+    batch_x_heads, q_tokens, _ = q.shape
+    _, k_tokens, _ = k.shape
+    qk_matmul_size_bytes = batch_x_heads * bytes_per_token * q_tokens * k_tokens
+
+    available_vram = int(get_available_vram() * 0.9) if q.device.type == 'mps' else int(get_available_vram() * 0.7)
+
+    if chunk_threshold_bytes is None:
+        chunk_threshold_bytes = available_vram
+    elif chunk_threshold_bytes == 0:
+        chunk_threshold_bytes = None
+
+    if kv_chunk_size_min is None:
+        kv_chunk_size_min = chunk_threshold_bytes // (batch_x_heads * bytes_per_token * (k.shape[2] + v.shape[2]))
+    elif kv_chunk_size_min == 0:
+        kv_chunk_size_min = None
+
+    if chunk_threshold_bytes is not None and qk_matmul_size_bytes <= chunk_threshold_bytes:
+        # the big matmul fits into our memory limit; do everything in 1 chunk,
+        # i.e. send it down the unchunked fast-path
+        query_chunk_size = q_tokens
+        kv_chunk_size = k_tokens
+
+    return efficient_dot_product_attention(
+        q,
+        k,
+        v,
+        query_chunk_size=q_chunk_size,
+        kv_chunk_size=kv_chunk_size,
+        kv_chunk_size_min = kv_chunk_size_min,
+        use_checkpoint=use_checkpoint,
+    )
+
+
 def xformers_attention_forward(self, x, context=None, mask=None):
     h = self.heads
     q_in = self.to_q(x)
@@ -252,12 +315,7 @@ def cross_attention_attnblock_forward(self, x):
 
         h_ = torch.zeros_like(k, device=q.device)
 
-        stats = torch.cuda.memory_stats(q.device)
-        mem_active = stats['active_bytes.all.current']
-        mem_reserved = stats['reserved_bytes.all.current']
-        mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
-        mem_free_torch = mem_reserved - mem_active
-        mem_free_total = mem_free_cuda + mem_free_torch
+        mem_free_total = get_available_vram()
 
         tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size()
         mem_required = tensor_size * 2.5
@@ -312,3 +370,19 @@ def xformers_attnblock_forward(self, x):
         return x + out
     except NotImplementedError:
         return cross_attention_attnblock_forward(self, x)
+
+def sub_quad_attnblock_forward(self, x):
+    h_ = x
+    h_ = self.norm(h_)
+    q = self.q(h_)
+    k = self.k(h_)
+    v = self.v(h_)
+    b, c, h, w = q.shape
+    q, k, v = map(lambda t: rearrange(t, 'b c h w -> b (h w) c'), (q, k, v))
+    q = q.contiguous()
+    k = k.contiguous()
+    v = v.contiguous()
+    out = sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold_bytes=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=self.training)
+    out = rearrange(out, 'b (h w) c -> b c h w', h=h)
+    out = self.proj_out(out)
+    return x + out
diff --git a/modules/shared.py b/modules/shared.py
index d4ddeea0..487a7792 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -56,6 +56,10 @@ parser.add_argument("--xformers", action='store_true', help="enable xformers for
 parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
 parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything")
 parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.")
+parser.add_argument("--opt-sub-quad-attention", action='store_true', help="enable memory efficient sub-quadratic cross-attention layer optimization")
+parser.add_argument("--sub-quad-q-chunk-size", type=int, help="query chunk size for the sub-quadratic cross-attention layer optimization to use", default=1024)
+parser.add_argument("--sub-quad-kv-chunk-size", type=int, help="kv chunk size for the sub-quadratic cross-attention layer optimization to use", default=None)
+parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the size threshold in bytes for the sub-quadratic cross-attention layer optimization to use chunking", default=None)
 parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
 parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
diff --git a/modules/sub_quadratic_attention.py b/modules/sub_quadratic_attention.py
new file mode 100644
index 00000000..b11dc1c7
--- /dev/null
+++ b/modules/sub_quadratic_attention.py
@@ -0,0 +1,201 @@
+# original source:
+#   https://github.com/AminRezaei0x443/memory-efficient-attention/blob/1bc0d9e6ac5f82ea43a375135c4e1d3896ee1694/memory_efficient_attention/attention_torch.py
+# license:
+#   unspecified
+# credit:
+#   Amin Rezaei (original author)
+#   Alex Birch (optimized algorithm for 3D tensors, at the expense of removing bias, masking and callbacks)
+# implementation of:
+#   Self-attention Does Not Need O(n2) Memory":
+#   https://arxiv.org/abs/2112.05682v2
+
+from functools import partial
+import torch
+from torch import Tensor
+from torch.utils.checkpoint import checkpoint
+import math
+from typing import Optional, NamedTuple, Protocol, List
+
+def dynamic_slice(
+    x: Tensor,
+    starts: List[int],
+    sizes: List[int],
+) -> Tensor:
+    slicing = [slice(start, start + size) for start, size in zip(starts, sizes)]
+    return x[slicing]
+
+class AttnChunk(NamedTuple):
+    exp_values: Tensor
+    exp_weights_sum: Tensor
+    max_score: Tensor
+
+class SummarizeChunk(Protocol):
+    @staticmethod
+    def __call__(
+        query: Tensor,
+        key: Tensor,
+        value: Tensor,
+    ) -> AttnChunk: ...
+
+class ComputeQueryChunkAttn(Protocol):
+    @staticmethod
+    def __call__(
+        query: Tensor,
+        key: Tensor,
+        value: Tensor,
+    ) -> Tensor: ...
+
+def _summarize_chunk(
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    scale: float,
+) -> AttnChunk:
+    attn_weights = torch.baddbmm(
+        torch.empty(1, 1, 1, device=query.device, dtype=query.dtype),
+        query,
+        key.transpose(1,2),
+        alpha=scale,
+        beta=0,
+    )
+    max_score, _ = torch.max(attn_weights, -1, keepdim=True)
+    max_score = max_score.detach()
+    exp_weights = torch.exp(attn_weights - max_score)
+    exp_values = torch.bmm(exp_weights, value)
+    max_score = max_score.squeeze(-1)
+    return AttnChunk(exp_values, exp_weights.sum(dim=-1), max_score)
+
+def _query_chunk_attention(
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    summarize_chunk: SummarizeChunk,
+    kv_chunk_size: int,
+) -> Tensor:
+    batch_x_heads, k_tokens, k_channels_per_head = key.shape
+    _, _, v_channels_per_head = value.shape
+
+    def chunk_scanner(chunk_idx: int) -> AttnChunk:
+        key_chunk = dynamic_slice(
+            key,
+            (0, chunk_idx, 0),
+            (batch_x_heads, kv_chunk_size, k_channels_per_head)
+        )
+        value_chunk = dynamic_slice(
+            value,
+            (0, chunk_idx, 0),
+            (batch_x_heads, kv_chunk_size, v_channels_per_head)
+        )
+        return summarize_chunk(query, key_chunk, value_chunk)
+
+    chunks: List[AttnChunk] = [
+        chunk_scanner(chunk) for chunk in torch.arange(0, k_tokens, kv_chunk_size)
+    ]
+    acc_chunk = AttnChunk(*map(torch.stack, zip(*chunks)))
+    chunk_values, chunk_weights, chunk_max = acc_chunk
+
+    global_max, _ = torch.max(chunk_max, 0, keepdim=True)
+    max_diffs = torch.exp(chunk_max - global_max)
+    chunk_values *= torch.unsqueeze(max_diffs, -1)
+    chunk_weights *= max_diffs
+
+    all_values = chunk_values.sum(dim=0)
+    all_weights = torch.unsqueeze(chunk_weights, -1).sum(dim=0)
+    return all_values / all_weights
+
+# TODO: refactor CrossAttention#get_attention_scores to share code with this
+def _get_attention_scores_no_kv_chunking(
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    scale: float,
+) -> Tensor:
+    attn_scores = torch.baddbmm(
+        torch.empty(1, 1, 1, device=query.device, dtype=query.dtype),
+        query,
+        key.transpose(1,2),
+        alpha=scale,
+        beta=0,
+    )
+    attn_probs = attn_scores.softmax(dim=-1)
+    del attn_scores
+    hidden_states_slice = torch.bmm(attn_probs, value)
+    return hidden_states_slice
+
+class ScannedChunk(NamedTuple):
+    chunk_idx: int
+    attn_chunk: AttnChunk
+
+def efficient_dot_product_attention(
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    query_chunk_size=1024,
+    kv_chunk_size: Optional[int] = None,
+    kv_chunk_size_min: Optional[int] = None,
+    use_checkpoint=True,
+):
+    """Computes efficient dot-product attention given query, key, and value.
+      This is efficient version of attention presented in
+      https://arxiv.org/abs/2112.05682v2 which comes with O(sqrt(n)) memory requirements.
+      Args:
+        query: queries for calculating attention with shape of
+          `[batch * num_heads, tokens, channels_per_head]`.
+        key: keys for calculating attention with shape of
+          `[batch * num_heads, tokens, channels_per_head]`.
+        value: values to be used in attention with shape of
+          `[batch * num_heads, tokens, channels_per_head]`.
+        query_chunk_size: int: query chunks size
+        kv_chunk_size: Optional[int]: key/value chunks size. if None: defaults to sqrt(key_tokens)
+        kv_chunk_size_min: Optional[int]: key/value minimum chunk size. only considered when kv_chunk_size is None. changes `sqrt(key_tokens)` into `max(sqrt(key_tokens), kv_chunk_size_min)`, to ensure our chunk sizes don't get too small (smaller chunks = more chunks = less concurrent work done).
+        use_checkpoint: bool: whether to use checkpointing (recommended True for training, False for inference)
+      Returns:
+        Output of shape `[batch * num_heads, query_tokens, channels_per_head]`.
+      """
+    batch_x_heads, q_tokens, q_channels_per_head = query.shape
+    _, k_tokens, _ = key.shape
+    scale = q_channels_per_head ** -0.5
+
+    kv_chunk_size = min(kv_chunk_size or int(math.sqrt(k_tokens)), k_tokens)
+    if kv_chunk_size_min is not None:
+        kv_chunk_size = max(kv_chunk_size, kv_chunk_size_min)
+
+    def get_query_chunk(chunk_idx: int) -> Tensor:
+        return dynamic_slice(
+            query,
+            (0, chunk_idx, 0),
+            (batch_x_heads, min(query_chunk_size, q_tokens), q_channels_per_head)
+        )
+    
+    summarize_chunk: SummarizeChunk = partial(_summarize_chunk, scale=scale)
+    summarize_chunk: SummarizeChunk = partial(checkpoint, summarize_chunk) if use_checkpoint else summarize_chunk
+    compute_query_chunk_attn: ComputeQueryChunkAttn = partial(
+        _get_attention_scores_no_kv_chunking,
+        scale=scale
+    ) if k_tokens <= kv_chunk_size else (
+        # fast-path for when there's just 1 key-value chunk per query chunk (this is just sliced attention btw)
+        partial(
+            _query_chunk_attention,
+            kv_chunk_size=kv_chunk_size,
+            summarize_chunk=summarize_chunk,
+        )
+    )
+
+    if q_tokens <= query_chunk_size:
+        # fast-path for when there's just 1 query chunk
+        return compute_query_chunk_attn(
+            query=query,
+            key=key,
+            value=value,
+        )
+    
+    # TODO: maybe we should use torch.empty_like(query) to allocate storage in-advance,
+    # and pass slices to be mutated, instead of torch.cat()ing the returned slices
+    res = torch.cat([
+        compute_query_chunk_attn(
+            query=get_query_chunk(i * query_chunk_size),
+            key=key,
+            value=value,
+        ) for i in range(math.ceil(q_tokens / query_chunk_size))
+    ], dim=1)
+    return res
-- 
cgit v1.2.3


From b119815333026164f2bd7d1ca71f3e4f7a9afd0d Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Thu, 5 Jan 2023 04:37:17 -0500
Subject: Use narrow instead of dynamic_slice

---
 modules/sub_quadratic_attention.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'modules')

diff --git a/modules/sub_quadratic_attention.py b/modules/sub_quadratic_attention.py
index b11dc1c7..95924d24 100644
--- a/modules/sub_quadratic_attention.py
+++ b/modules/sub_quadratic_attention.py
@@ -5,6 +5,7 @@
 # credit:
 #   Amin Rezaei (original author)
 #   Alex Birch (optimized algorithm for 3D tensors, at the expense of removing bias, masking and callbacks)
+#   brkirch (modified to use torch.narrow instead of dynamic_slice implementation)
 # implementation of:
 #   Self-attention Does Not Need O(n2) Memory":
 #   https://arxiv.org/abs/2112.05682v2
@@ -16,13 +17,13 @@ from torch.utils.checkpoint import checkpoint
 import math
 from typing import Optional, NamedTuple, Protocol, List
 
-def dynamic_slice(
-    x: Tensor,
-    starts: List[int],
-    sizes: List[int],
+def narrow_trunc(
+    input: Tensor,
+    dim: int,
+    start: int,
+    length: int
 ) -> Tensor:
-    slicing = [slice(start, start + size) for start, size in zip(starts, sizes)]
-    return x[slicing]
+    return torch.narrow(input, dim, start, length if input.shape[dim] >= start + length else input.shape[dim] - start)
 
 class AttnChunk(NamedTuple):
     exp_values: Tensor
@@ -76,15 +77,17 @@ def _query_chunk_attention(
     _, _, v_channels_per_head = value.shape
 
     def chunk_scanner(chunk_idx: int) -> AttnChunk:
-        key_chunk = dynamic_slice(
+        key_chunk = narrow_trunc(
             key,
-            (0, chunk_idx, 0),
-            (batch_x_heads, kv_chunk_size, k_channels_per_head)
+            1,
+            chunk_idx,
+            kv_chunk_size
         )
-        value_chunk = dynamic_slice(
+        value_chunk = narrow_trunc(
             value,
-            (0, chunk_idx, 0),
-            (batch_x_heads, kv_chunk_size, v_channels_per_head)
+            1,
+            chunk_idx,
+            kv_chunk_size
         )
         return summarize_chunk(query, key_chunk, value_chunk)
 
@@ -161,10 +164,11 @@ def efficient_dot_product_attention(
         kv_chunk_size = max(kv_chunk_size, kv_chunk_size_min)
 
     def get_query_chunk(chunk_idx: int) -> Tensor:
-        return dynamic_slice(
+        return narrow_trunc(
             query,
-            (0, chunk_idx, 0),
-            (batch_x_heads, min(query_chunk_size, q_tokens), q_channels_per_head)
+            1,
+            chunk_idx,
+            min(query_chunk_size, q_tokens)
         )
     
     summarize_chunk: SummarizeChunk = partial(_summarize_chunk, scale=scale)
-- 
cgit v1.2.3


From b95a4c0ce5ab9c414e0494193bfff665f45e9e65 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Fri, 6 Jan 2023 01:01:51 -0500
Subject: Change sub-quad chunk threshold to use percentage

---
 modules/sd_hijack_optimizations.py | 18 +++++++++---------
 modules/shared.py                  |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'modules')

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index f5c153e8..b416e9ac 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -233,7 +233,7 @@ def sub_quad_attention_forward(self, x, context=None, mask=None):
     k = k.unflatten(-1, (h, -1)).transpose(1,2).flatten(end_dim=1)
     v = v.unflatten(-1, (h, -1)).transpose(1,2).flatten(end_dim=1)
 
-    x = sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold_bytes=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=self.training)
+    x = sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=self.training)
 
     x = x.unflatten(0, (-1, h)).transpose(1,2).flatten(start_dim=2)
 
@@ -243,20 +243,20 @@ def sub_quad_attention_forward(self, x, context=None, mask=None):
 
     return x
 
-def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_size_min=None, chunk_threshold_bytes=None, use_checkpoint=True):
+def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_size_min=None, chunk_threshold=None, use_checkpoint=True):
     bytes_per_token = torch.finfo(q.dtype).bits//8
     batch_x_heads, q_tokens, _ = q.shape
     _, k_tokens, _ = k.shape
     qk_matmul_size_bytes = batch_x_heads * bytes_per_token * q_tokens * k_tokens
 
-    available_vram = int(get_available_vram() * 0.9) if q.device.type == 'mps' else int(get_available_vram() * 0.7)
-
-    if chunk_threshold_bytes is None:
-        chunk_threshold_bytes = available_vram
-    elif chunk_threshold_bytes == 0:
+    if chunk_threshold is None:
+        chunk_threshold_bytes = int(get_available_vram() * 0.9) if q.device.type == 'mps' else int(get_available_vram() * 0.7)
+    elif chunk_threshold == 0:
         chunk_threshold_bytes = None
+    else:
+        chunk_threshold_bytes = int(0.01 * chunk_threshold * get_available_vram())
 
-    if kv_chunk_size_min is None:
+    if kv_chunk_size_min is None and chunk_threshold_bytes is not None:
         kv_chunk_size_min = chunk_threshold_bytes // (batch_x_heads * bytes_per_token * (k.shape[2] + v.shape[2]))
     elif kv_chunk_size_min == 0:
         kv_chunk_size_min = None
@@ -382,7 +382,7 @@ def sub_quad_attnblock_forward(self, x):
     q = q.contiguous()
     k = k.contiguous()
     v = v.contiguous()
-    out = sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold_bytes=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=self.training)
+    out = sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=self.training)
     out = rearrange(out, 'b (h w) c -> b c h w', h=h)
     out = self.proj_out(out)
     return x + out
diff --git a/modules/shared.py b/modules/shared.py
index cb1dc312..d7a81db1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -59,7 +59,7 @@ parser.add_argument("--opt-split-attention", action='store_true', help="force-en
 parser.add_argument("--opt-sub-quad-attention", action='store_true', help="enable memory efficient sub-quadratic cross-attention layer optimization")
 parser.add_argument("--sub-quad-q-chunk-size", type=int, help="query chunk size for the sub-quadratic cross-attention layer optimization to use", default=1024)
 parser.add_argument("--sub-quad-kv-chunk-size", type=int, help="kv chunk size for the sub-quadratic cross-attention layer optimization to use", default=None)
-parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the size threshold in bytes for the sub-quadratic cross-attention layer optimization to use chunking", default=None)
+parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the percentage of VRAM threshold for the sub-quadratic cross-attention layer optimization to use chunking", default=None)
 parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
 parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
-- 
cgit v1.2.3


From 5deb2a19ccea57a50252e8fcb07b4d17c6599def Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Fri, 6 Jan 2023 01:33:15 -0500
Subject: Allow Doggettx's cross attention opt without CUDA

---
 modules/sd_hijack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules')

diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index ef25dadb..bd101e5b 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -50,7 +50,7 @@ def apply_optimizations():
         print("Applying v1 cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
         optimization_method = 'V1'
-    elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()):
+    elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not cmd_opts.opt_split_attention and not torch.cuda.is_available()):
         print("Applying cross attention optimization (InvokeAI).")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI
         optimization_method = 'InvokeAI'
-- 
cgit v1.2.3


From c18add68ef7d2de3617cbbaff864b0c74cfdf6c0 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Fri, 6 Jan 2023 16:42:47 -0500
Subject: Added license

---
 modules/sd_hijack_optimizations.py | 1 +
 modules/sub_quadratic_attention.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules')

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index b416e9ac..cdc63ed7 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -216,6 +216,7 @@ def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None):
 
 
 # Based on Birch-san's modified implementation of sub-quadratic attention from https://github.com/Birch-san/diffusers/pull/1
+# The sub_quad_attention_forward function is under the MIT License listed under Memory Efficient Attention in the Licenses section of the web UI interface
 def sub_quad_attention_forward(self, x, context=None, mask=None):
     assert mask is None, "attention-mask not currently implemented for SubQuadraticCrossAttnProcessor."
 
diff --git a/modules/sub_quadratic_attention.py b/modules/sub_quadratic_attention.py
index 95924d24..fea7aaac 100644
--- a/modules/sub_quadratic_attention.py
+++ b/modules/sub_quadratic_attention.py
@@ -1,7 +1,7 @@
 # original source:
 #   https://github.com/AminRezaei0x443/memory-efficient-attention/blob/1bc0d9e6ac5f82ea43a375135c4e1d3896ee1694/memory_efficient_attention/attention_torch.py
 # license:
-#   unspecified
+#   MIT License (see Memory Efficient Attention under the Licenses section in the web UI interface for the full license)
 # credit:
 #   Amin Rezaei (original author)
 #   Alex Birch (optimized algorithm for 3D tensors, at the expense of removing bias, masking and callbacks)
-- 
cgit v1.2.3


From 82c1f10b144f733460feead0bdc37a861489dc57 Mon Sep 17 00:00:00 2001
From: Dean Hopkins <deanhopkins@gmail.com>
Date: Fri, 6 Jan 2023 22:00:12 +0000
Subject: increase upscale api validation limit

---
 modules/api/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules')

diff --git a/modules/api/models.py b/modules/api/models.py
index f77951fc..22b88c59 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -125,7 +125,7 @@ class ExtrasBaseRequest(BaseModel):
     gfpgan_visibility: float = Field(default=0, title="GFPGAN Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of GFPGAN, values should be between 0 and 1.")
     codeformer_visibility: float = Field(default=0, title="CodeFormer Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of CodeFormer, values should be between 0 and 1.")
     codeformer_weight: float = Field(default=0, title="CodeFormer Weight", ge=0, le=1, allow_inf_nan=False, description="Sets the weight of CodeFormer, values should be between 0 and 1.")
-    upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=4, description="By how much to upscale the image, only used when resize_mode=0.")
+    upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=8, description="By how much to upscale the image, only used when resize_mode=0.")
     upscaling_resize_w: int = Field(default=512, title="Target Width", ge=1, description="Target width for the upscaler to hit. Only used when resize_mode=1.")
     upscaling_resize_h: int = Field(default=512, title="Target Height", ge=1, description="Target height for the upscaler to hit. Only used when resize_mode=1.")
     upscaling_crop: bool = Field(default=True, title="Crop to fit", description="Should the upscaler crop the image to fit in the choosen size?")
-- 
cgit v1.2.3


From fdfce4711076c2ebac1089bac8169d043eb7978f Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 7 Jan 2023 13:29:47 +0300
Subject: add "from" resolution for hires fix to be less confusing.

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules')

diff --git a/modules/ui.py b/modules/ui.py
index 6c765262..99483130 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -267,7 +267,7 @@ def calc_resolution_hires(enable, width, height, hr_scale, hr_resize_x, hr_resiz
     with devices.autocast():
         p.init([""], [0], [0])
 
-    return f"resize to: <span class='resolution'>{p.hr_upscale_to_x}x{p.hr_upscale_to_y}</span>"
+    return f"resize: from <span class='resolution'>{width}x{height}</span> to <span class='resolution'>{p.hr_upscale_to_x}x{p.hr_upscale_to_y}</span>"
 
 
 def apply_styles(prompt, prompt_neg, style1_name, style2_name):
-- 
cgit v1.2.3


From df3b31eb559ab9fabf7e513bdeddd5282c16f124 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Sat, 7 Jan 2023 07:04:59 -0500
Subject: In-place operations can break gradient calculation

---
 modules/sd_hijack_clip.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 5520c9b2..852afc66 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -247,9 +247,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
         batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
         original_mean = z.mean()
-        z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
+        z = z * batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
         new_mean = z.mean()
-        z *= original_mean / new_mean
+        z = z * (original_mean / new_mean)
 
         return z
 
-- 
cgit v1.2.3


From d38ede71d5330958f4bbac5f99c1be3c146b506a Mon Sep 17 00:00:00 2001
From: noodleanon <122053346+noodleanon@users.noreply.github.com>
Date: Sat, 7 Jan 2023 14:21:31 +0000
Subject: Added script support in txt2img endpoint

---
 modules/api/api.py    | 22 +++++++++++++++++++---
 modules/api/models.py |  2 +-
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'modules')

diff --git a/modules/api/api.py b/modules/api/api.py
index aa62a42e..0e8ea263 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -149,6 +149,14 @@ class Api:
         raise HTTPException(status_code=401, detail="Incorrect username or password", headers={"WWW-Authenticate": "Basic"})
 
     def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
+        if txt2imgreq.script_name is not None:
+            if scripts.scripts_txt2img.scripts == []:
+                scripts.scripts_txt2img.initialize_scripts(True)
+                ui.create_ui()
+
+            script_idx = script_name_to_index(txt2imgreq.script_name, scripts.scripts_txt2img.selectable_scripts)
+            script = scripts.scripts_txt2img.selectable_scripts[script_idx]
+
         populate = txt2imgreq.copy(update={ # Override __init__ params
             "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
             "do_not_save_samples": True,
@@ -158,11 +166,20 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
+        args = vars(populate)
+        args.pop('script_name', None)
+
         with self.queue_lock:
-            p = StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **vars(populate))
+            p = StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **args)
 
             shared.state.begin()
-            processed = process_images(p)
+            if 'script' in locals():
+                p.outpath_grids = opts.outdir_txt2img_grids
+                p.outpath_samples = opts.outdir_txt2img_samples
+                p.script_args = [script_idx + 1] + [None] * (script.args_from - 1) + p.script_args
+                processed = scripts.scripts_txt2img.run(p, *p.script_args)
+            else:
+                processed = process_images(p)
             shared.state.end()
 
 
@@ -213,7 +230,6 @@ class Api:
                 processed = scripts.scripts_img2img.run(p, *p.script_args)
             else:
                 processed = process_images(p)
-
             shared.state.end()
 
         b64images = list(map(encode_pil_to_base64, processed.images))
diff --git a/modules/api/models.py b/modules/api/models.py
index c85eb94d..ce43c858 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -100,7 +100,7 @@ class PydanticModelGenerator:
 StableDiffusionTxt2ImgProcessingAPI = PydanticModelGenerator(
     "StableDiffusionProcessingTxt2Img",
     StableDiffusionProcessingTxt2Img,
-    [{"key": "sampler_index", "type": str, "default": "Euler"}]
+    [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "script_name", "type": str, "default": None}, {"key": "script_args", "type": list, "default": []}]
 ).generate_model()
 
 StableDiffusionImg2ImgProcessingAPI = PydanticModelGenerator(
-- 
cgit v1.2.3


From a0c87f1fdf2b76b2ae4ef6c4b01ddaede3afab06 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 8 Jan 2023 08:52:26 +0300
Subject: skip images in embeddings dir if they have a second .preview
 extension

---
 modules/textual_inversion/textual_inversion.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'modules')

diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 45882ed6..e85dd549 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -109,6 +109,10 @@ class EmbeddingDatabase:
             ext = ext.upper()
 
             if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
+                _, second_ext = os.path.splitext(name)
+                if second_ext.upper() == '.PREVIEW':
+                    return
+
                 embed_image = Image.open(path)
                 if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
                     data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
-- 
cgit v1.2.3


From 085427de0efc9e9e7a6e9a5aebc6b5a69f0365e7 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 8 Jan 2023 09:37:33 +0300
Subject: make it possible for extensions/scripts to add their own embedding
 directories

---
 modules/sd_hijack.py                           |   7 +-
 modules/textual_inversion/textual_inversion.py | 170 +++++++++++++++----------
 2 files changed, 108 insertions(+), 69 deletions(-)

(limited to 'modules')

diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index cfdb09d6..6b0d95af 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -83,10 +83,12 @@ class StableDiffusionModelHijack:
     clip = None
     optimization_method = None
 
-    embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir)
+    embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase()
 
-    def hijack(self, m):
+    def __init__(self):
+        self.embedding_db.add_embedding_dir(cmd_opts.embeddings_dir)
 
+    def hijack(self, m):
         if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
             model_embeddings = m.cond_stage_model.roberta.embeddings
             model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
@@ -117,7 +119,6 @@ class StableDiffusionModelHijack:
         self.layers = flatten(m)
 
     def undo_hijack(self, m):
-
         if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
             m.cond_stage_model = m.cond_stage_model.wrapped 
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index e85dd549..217fe9eb 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -66,17 +66,41 @@ class Embedding:
         return self.cached_checksum
 
 
+class DirWithTextualInversionEmbeddings:
+    def __init__(self, path):
+        self.path = path
+        self.mtime = None
+
+    def has_changed(self):
+        if not os.path.isdir(self.path):
+            return False
+
+        mt = os.path.getmtime(self.path)
+        if self.mtime is None or mt > self.mtime:
+            return True
+
+    def update(self):
+        if not os.path.isdir(self.path):
+            return
+
+        self.mtime = os.path.getmtime(self.path)
+
+
 class EmbeddingDatabase:
-    def __init__(self, embeddings_dir):
+    def __init__(self):
         self.ids_lookup = {}
         self.word_embeddings = {}
         self.skipped_embeddings = {}
-        self.dir_mtime = None
-        self.embeddings_dir = embeddings_dir
         self.expected_shape = -1
+        self.embedding_dirs = {}
 
-    def register_embedding(self, embedding, model):
+    def add_embedding_dir(self, path):
+        self.embedding_dirs[path] = DirWithTextualInversionEmbeddings(path)
+
+    def clear_embedding_dirs(self):
+        self.embedding_dirs.clear()
 
+    def register_embedding(self, embedding, model):
         self.word_embeddings[embedding.name] = embedding
 
         ids = model.cond_stage_model.tokenize([embedding.name])[0]
@@ -93,69 +117,62 @@ class EmbeddingDatabase:
         vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1)
         return vec.shape[1]
 
-    def load_textual_inversion_embeddings(self, force_reload = False):
-        mt = os.path.getmtime(self.embeddings_dir)
-        if not force_reload and self.dir_mtime is not None and mt <= self.dir_mtime:
-            return
+    def load_from_file(self, path, filename):
+        name, ext = os.path.splitext(filename)
+        ext = ext.upper()
 
-        self.dir_mtime = mt
-        self.ids_lookup.clear()
-        self.word_embeddings.clear()
-        self.skipped_embeddings.clear()
-        self.expected_shape = self.get_expected_shape()
-
-        def process_file(path, filename):
-            name, ext = os.path.splitext(filename)
-            ext = ext.upper()
-
-            if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
-                _, second_ext = os.path.splitext(name)
-                if second_ext.upper() == '.PREVIEW':
-                    return
-
-                embed_image = Image.open(path)
-                if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
-                    data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
-                    name = data.get('name', name)
-                else:
-                    data = extract_image_data_embed(embed_image)
-                    name = data.get('name', name)
-            elif ext in ['.BIN', '.PT']:
-                data = torch.load(path, map_location="cpu")
-            else:
+        if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
+            _, second_ext = os.path.splitext(name)
+            if second_ext.upper() == '.PREVIEW':
                 return
 
-            # textual inversion embeddings
-            if 'string_to_param' in data:
-                param_dict = data['string_to_param']
-                if hasattr(param_dict, '_parameters'):
-                    param_dict = getattr(param_dict, '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
-                assert len(param_dict) == 1, 'embedding file has multiple terms in it'
-                emb = next(iter(param_dict.items()))[1]
-            # diffuser concepts
-            elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
-                assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
-
-                emb = next(iter(data.values()))
-                if len(emb.shape) == 1:
-                    emb = emb.unsqueeze(0)
-            else:
-                raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
-
-            vec = emb.detach().to(devices.device, dtype=torch.float32)
-            embedding = Embedding(vec, name)
-            embedding.step = data.get('step', None)
-            embedding.sd_checkpoint = data.get('sd_checkpoint', None)
-            embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
-            embedding.vectors = vec.shape[0]
-            embedding.shape = vec.shape[-1]
-
-            if self.expected_shape == -1 or self.expected_shape == embedding.shape:
-                self.register_embedding(embedding, shared.sd_model)
+            embed_image = Image.open(path)
+            if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
+                data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
+                name = data.get('name', name)
             else:
-                self.skipped_embeddings[name] = embedding
+                data = extract_image_data_embed(embed_image)
+                name = data.get('name', name)
+        elif ext in ['.BIN', '.PT']:
+            data = torch.load(path, map_location="cpu")
+        else:
+            return
+
+        # textual inversion embeddings
+        if 'string_to_param' in data:
+            param_dict = data['string_to_param']
+            if hasattr(param_dict, '_parameters'):
+                param_dict = getattr(param_dict, '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
+            assert len(param_dict) == 1, 'embedding file has multiple terms in it'
+            emb = next(iter(param_dict.items()))[1]
+        # diffuser concepts
+        elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
+            assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
+
+            emb = next(iter(data.values()))
+            if len(emb.shape) == 1:
+                emb = emb.unsqueeze(0)
+        else:
+            raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
+
+        vec = emb.detach().to(devices.device, dtype=torch.float32)
+        embedding = Embedding(vec, name)
+        embedding.step = data.get('step', None)
+        embedding.sd_checkpoint = data.get('sd_checkpoint', None)
+        embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
+        embedding.vectors = vec.shape[0]
+        embedding.shape = vec.shape[-1]
+
+        if self.expected_shape == -1 or self.expected_shape == embedding.shape:
+            self.register_embedding(embedding, shared.sd_model)
+        else:
+            self.skipped_embeddings[name] = embedding
 
-        for root, dirs, fns in os.walk(self.embeddings_dir):
+    def load_from_dir(self, embdir):
+        if not os.path.isdir(embdir.path):
+            return
+
+        for root, dirs, fns in os.walk(embdir.path):
             for fn in fns:
                 try:
                     fullfn = os.path.join(root, fn)
@@ -163,12 +180,32 @@ class EmbeddingDatabase:
                     if os.stat(fullfn).st_size == 0:
                         continue
 
-                    process_file(fullfn, fn)
+                    self.load_from_file(fullfn, fn)
                 except Exception:
                     print(f"Error loading embedding {fn}:", file=sys.stderr)
                     print(traceback.format_exc(), file=sys.stderr)
                     continue
 
+    def load_textual_inversion_embeddings(self, force_reload=False):
+        if not force_reload:
+            need_reload = False
+            for path, embdir in self.embedding_dirs.items():
+                if embdir.has_changed():
+                    need_reload = True
+                    break
+
+            if not need_reload:
+                return
+
+        self.ids_lookup.clear()
+        self.word_embeddings.clear()
+        self.skipped_embeddings.clear()
+        self.expected_shape = self.get_expected_shape()
+
+        for path, embdir in self.embedding_dirs.items():
+            self.load_from_dir(embdir)
+            embdir.update()
+
         print(f"Textual inversion embeddings loaded({len(self.word_embeddings)}): {', '.join(self.word_embeddings.keys())}")
         if len(self.skipped_embeddings) > 0:
             print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings.keys())}")
@@ -251,14 +288,15 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat
     assert os.path.isfile(template_file), "Prompt template file doesn't exist"
     assert steps, "Max steps is empty or 0"
     assert isinstance(steps, int), "Max steps must be integer"
-    assert steps > 0 , "Max steps must be positive"
+    assert steps > 0, "Max steps must be positive"
     assert isinstance(save_model_every, int), "Save {name} must be integer"
-    assert save_model_every >= 0 , "Save {name} must be positive or 0"
+    assert save_model_every >= 0, "Save {name} must be positive or 0"
     assert isinstance(create_image_every, int), "Create image must be integer"
-    assert create_image_every >= 0 , "Create image must be positive or 0"
+    assert create_image_every >= 0, "Create image must be positive or 0"
     if save_model_every or create_image_every:
         assert log_directory, "Log directory is empty"
 
+
 def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     save_embedding_every = save_embedding_every or 0
     create_image_every = create_image_every or 0
-- 
cgit v1.2.3


From 6d0cc1e239e0a43a2e6d696eae20c66fad0819bb Mon Sep 17 00:00:00 2001
From: noodleanon <122053346+noodleanon@users.noreply.github.com>
Date: Sun, 8 Jan 2023 11:03:48 +0000
Subject: Corrected is_img2img param

---
 modules/api/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules')

diff --git a/modules/api/api.py b/modules/api/api.py
index 0e8ea263..1785a6b4 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -151,7 +151,7 @@ class Api:
     def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
         if txt2imgreq.script_name is not None:
             if scripts.scripts_txt2img.scripts == []:
-                scripts.scripts_txt2img.initialize_scripts(True)
+                scripts.scripts_txt2img.initialize_scripts(False)
                 ui.create_ui()
 
             script_idx = script_name_to_index(txt2imgreq.script_name, scripts.scripts_txt2img.selectable_scripts)
-- 
cgit v1.2.3


From 137ce534b2355a527cd1a50c192909161258b442 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 8 Jan 2023 16:14:38 +0300
Subject: remove some code duplication remove calls to locals() add a test for
 img2img with script

---
 modules/api/api.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'modules')

diff --git a/modules/api/api.py b/modules/api/api.py
index 1785a6b4..5b6125f8 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -148,14 +148,20 @@ class Api:
 
         raise HTTPException(status_code=401, detail="Incorrect username or password", headers={"WWW-Authenticate": "Basic"})
 
-    def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
-        if txt2imgreq.script_name is not None:
-            if scripts.scripts_txt2img.scripts == []:
-                scripts.scripts_txt2img.initialize_scripts(False)
-                ui.create_ui()
+    def get_script(self, script_name, script_runner):
+        if script_name is None:
+            return None, None
+
+        if not script_runner.scripts:
+            script_runner.initialize_scripts(False)
+            ui.create_ui()
+
+        script_idx = script_name_to_index(script_name, script_runner.selectable_scripts)
+        script = script_runner.selectable_scripts[script_idx]
+        return script, script_idx
 
-            script_idx = script_name_to_index(txt2imgreq.script_name, scripts.scripts_txt2img.selectable_scripts)
-            script = scripts.scripts_txt2img.selectable_scripts[script_idx]
+    def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
+        script, script_idx = self.get_script(txt2imgreq.script_name, scripts.scripts_txt2img)
 
         populate = txt2imgreq.copy(update={ # Override __init__ params
             "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
@@ -173,7 +179,7 @@ class Api:
             p = StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **args)
 
             shared.state.begin()
-            if 'script' in locals():
+            if script is not None:
                 p.outpath_grids = opts.outdir_txt2img_grids
                 p.outpath_samples = opts.outdir_txt2img_samples
                 p.script_args = [script_idx + 1] + [None] * (script.args_from - 1) + p.script_args
@@ -182,7 +188,6 @@ class Api:
                 processed = process_images(p)
             shared.state.end()
 
-
         b64images = list(map(encode_pil_to_base64, processed.images))
 
         return TextToImageResponse(images=b64images, parameters=vars(txt2imgreq), info=processed.js())
@@ -192,13 +197,7 @@ class Api:
         if init_images is None:
             raise HTTPException(status_code=404, detail="Init image not found")
 
-        if img2imgreq.script_name is not None:
-            if scripts.scripts_img2img.scripts == []:
-                scripts.scripts_img2img.initialize_scripts(True)
-                ui.create_ui()
-
-            script_idx = script_name_to_index(img2imgreq.script_name, scripts.scripts_img2img.selectable_scripts)
-            script = scripts.scripts_img2img.selectable_scripts[script_idx]
+        script, script_idx = self.get_script(img2imgreq.script_name, scripts.scripts_img2img)
 
         mask = img2imgreq.mask
         if mask:
@@ -223,7 +222,7 @@ class Api:
             p.init_images = [decode_base64_to_image(x) for x in init_images]
 
             shared.state.begin()
-            if 'script' in locals():
+            if script is not None:
                 p.outpath_grids = opts.outdir_img2img_grids
                 p.outpath_samples = opts.outdir_img2img_samples
                 p.script_args = [script_idx + 1] + [None] * (script.args_from - 1) + p.script_args
-- 
cgit v1.2.3