From 49a55b410b66b7dd9be9335d8a2e3a71e4f8b15c Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Thu, 11 May 2023 18:28:15 +0300 Subject: Autofix Ruff W (not W605) (mostly whitespace) --- modules/script_callbacks.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'modules/script_callbacks.py') diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 17109732..7d9dd736 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -32,22 +32,22 @@ class CFGDenoiserParams: def __init__(self, x, image_cond, sigma, sampling_step, total_sampling_steps, text_cond, text_uncond): self.x = x """Latent image representation in the process of being denoised""" - + self.image_cond = image_cond """Conditioning image""" - + self.sigma = sigma """Current sigma noise step value""" - + self.sampling_step = sampling_step """Current Sampling step number""" - + self.total_sampling_steps = total_sampling_steps """Total number of sampling steps planned""" - + self.text_cond = text_cond """ Encoder hidden states of text conditioning from prompt""" - + self.text_uncond = text_uncond """ Encoder hidden states of text conditioning from negative prompt""" @@ -240,7 +240,7 @@ def add_callback(callbacks, fun): callbacks.append(ScriptCallback(filename, fun)) - + def remove_current_script_callbacks(): stack = [x for x in inspect.stack() if x.filename != __file__] filename = stack[0].filename if len(stack) > 0 else 'unknown file' -- cgit v1.2.3 From 3078001439d25b66ef5627c9e3d431aa23bbed73 Mon Sep 17 00:00:00 2001 From: catboxanon <122327233+catboxanon@users.noreply.github.com> Date: Sun, 14 May 2023 01:49:41 +0000 Subject: Add/modify CFG callbacks Required by self-attn guidance extension https://github.com/ashen-sensored/sd_webui_SAG --- modules/script_callbacks.py | 35 +++++++++++++++++++++++++++++++++++ modules/sd_samplers_kdiffusion.py | 8 +++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'modules/script_callbacks.py') diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 7d9dd736..e83c6ecf 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -53,6 +53,21 @@ class CFGDenoiserParams: class CFGDenoisedParams: + def __init__(self, x, sampling_step, total_sampling_steps, inner_model): + self.x = x + """Latent image representation in the process of being denoised""" + + self.sampling_step = sampling_step + """Current Sampling step number""" + + self.total_sampling_steps = total_sampling_steps + """Total number of sampling steps planned""" + + self.inner_model = inner_model + """Inner model reference that is being used for denoising""" + + +class AfterCFGCallbackParams: def __init__(self, x, sampling_step, total_sampling_steps): self.x = x """Latent image representation in the process of being denoised""" @@ -63,6 +78,9 @@ class CFGDenoisedParams: self.total_sampling_steps = total_sampling_steps """Total number of sampling steps planned""" + self.output_altered = False + """A flag for CFGDenoiser that indicates whether the output has been altered by the callback""" + class UiTrainTabParams: def __init__(self, txt2img_preview_params): @@ -87,6 +105,7 @@ callback_map = dict( callbacks_image_saved=[], callbacks_cfg_denoiser=[], callbacks_cfg_denoised=[], + callbacks_cfg_after_cfg=[], callbacks_before_component=[], callbacks_after_component=[], callbacks_image_grid=[], @@ -186,6 +205,14 @@ def cfg_denoised_callback(params: CFGDenoisedParams): report_exception(c, 'cfg_denoised_callback') +def cfg_after_cfg_callback(params: AfterCFGCallbackParams): + for c in callback_map['callbacks_cfg_after_cfg']: + try: + c.callback(params) + except Exception: + report_exception(c, 'cfg_after_cfg_callback') + + def before_component_callback(component, **kwargs): for c in callback_map['callbacks_before_component']: try: @@ -332,6 +359,14 @@ def on_cfg_denoised(callback): add_callback(callback_map['callbacks_cfg_denoised'], callback) +def on_cfg_after_cfg(callback): + """register a function to be called in the kdiffussion cfg_denoiser method after cfg calculations has completed. + The callback is called with one argument: + - params: CFGDenoisedParams - parameters to be passed to the inner model and sampling state details. + """ + add_callback(callback_map['callbacks_cfg_after_cfg'], callback) + + def on_before_component(callback): """register a function to be called before a component is created. The callback is called with arguments: diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index e9e41818..55f0d3a3 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -8,6 +8,7 @@ from modules.shared import opts, state import modules.shared as shared from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback +from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback samplers_k_diffusion = [ ('Euler a', 'sample_euler_ancestral', ['k_euler_a', 'k_euler_ancestral'], {}), @@ -160,7 +161,7 @@ class CFGDenoiser(torch.nn.Module): fake_uncond = torch.cat([x_out[i:i+1] for i in denoised_image_indexes]) x_out = torch.cat([x_out, fake_uncond]) # we skipped uncond denoising, so we put cond-denoised image to where the uncond-denoised image should be - denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps) + denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model) cfg_denoised_callback(denoised_params) devices.test_for_nans(x_out, "unet") @@ -180,6 +181,11 @@ class CFGDenoiser(torch.nn.Module): if self.mask is not None: denoised = self.init_latent * self.mask + self.nmask * denoised + after_cfg_callback_params = AfterCFGCallbackParams(denoised, state.sampling_step, state.sampling_steps) + cfg_after_cfg_callback(after_cfg_callback_params) + if after_cfg_callback_params.output_altered: + denoised = after_cfg_callback_params.x + self.step += 1 return denoised -- cgit v1.2.3 From 8abfc95013d247c8a863d048574bc1f9d1eb0443 Mon Sep 17 00:00:00 2001 From: Sakura-Luna <53183413+Sakura-Luna@users.noreply.github.com> Date: Sun, 14 May 2023 12:56:34 +0800 Subject: Update script_callbacks.py --- modules/script_callbacks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'modules/script_callbacks.py') diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index e83c6ecf..57dfd457 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -64,7 +64,7 @@ class CFGDenoisedParams: """Total number of sampling steps planned""" self.inner_model = inner_model - """Inner model reference that is being used for denoising""" + """Inner model reference used for denoising""" class AfterCFGCallbackParams: @@ -79,7 +79,7 @@ class AfterCFGCallbackParams: """Total number of sampling steps planned""" self.output_altered = False - """A flag for CFGDenoiser that indicates whether the output has been altered by the callback""" + """A flag for CFGDenoiser indicating whether the output has been altered by the callback""" class UiTrainTabParams: @@ -360,9 +360,9 @@ def on_cfg_denoised(callback): def on_cfg_after_cfg(callback): - """register a function to be called in the kdiffussion cfg_denoiser method after cfg calculations has completed. + """register a function to be called in the kdiffussion cfg_denoiser method after cfg calculations are completed. The callback is called with one argument: - - params: CFGDenoisedParams - parameters to be passed to the inner model and sampling state details. + - params: AfterCFGCallbackParams - parameters to be passed to the script for post-processing after cfg calculation. """ add_callback(callback_map['callbacks_cfg_after_cfg'], callback) -- cgit v1.2.3 From 005849331e82cded96f6f3e5ff828037c672c38d Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 14 May 2023 08:15:22 +0300 Subject: remove output_altered flag from AfterCFGCallbackParams --- modules/script_callbacks.py | 3 --- modules/sd_samplers_kdiffusion.py | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'modules/script_callbacks.py') diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 57dfd457..3c21a362 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -78,9 +78,6 @@ class AfterCFGCallbackParams: self.total_sampling_steps = total_sampling_steps """Total number of sampling steps planned""" - self.output_altered = False - """A flag for CFGDenoiser indicating whether the output has been altered by the callback""" - class UiTrainTabParams: def __init__(self, txt2img_preview_params): diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 55f0d3a3..61f23ad7 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -183,8 +183,7 @@ class CFGDenoiser(torch.nn.Module): after_cfg_callback_params = AfterCFGCallbackParams(denoised, state.sampling_step, state.sampling_steps) cfg_after_cfg_callback(after_cfg_callback_params) - if after_cfg_callback_params.output_altered: - denoised = after_cfg_callback_params.x + denoised = after_cfg_callback_params.x self.step += 1 return denoised -- cgit v1.2.3 From 2582a0fd3b3e91c5fba9e5e561cbdf5fee835063 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Thu, 18 May 2023 22:48:28 +0300 Subject: make it possible for scripts to add cross attention optimizations add UI selection for cross attention optimization --- modules/cmd_args.py | 14 ++-- modules/script_callbacks.py | 21 ++++++ modules/sd_hijack.py | 90 ++++++++++++++----------- modules/sd_hijack_optimizations.py | 135 ++++++++++++++++++++++++++++++++++++- modules/shared.py | 1 + modules/shared_items.py | 8 +++ webui.py | 10 +++ 7 files changed, 228 insertions(+), 51 deletions(-) (limited to 'modules/script_callbacks.py') diff --git a/modules/cmd_args.py b/modules/cmd_args.py index 7bde161e..85db93f3 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -53,16 +53,16 @@ parser.add_argument("--xformers", action='store_true', help="enable xformers for parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work") parser.add_argument("--xformers-flash-attention", action='store_true', help="enable xformers with Flash Attention to improve reproducibility (supported for SD2.x or variant only)") parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything") -parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.") -parser.add_argument("--opt-sub-quad-attention", action='store_true', help="enable memory efficient sub-quadratic cross-attention layer optimization") +parser.add_argument("--opt-split-attention", action='store_true', help="prefer Doggettx's cross-attention layer optimization for automatic choice of optimization") +parser.add_argument("--opt-sub-quad-attention", action='store_true', help="prefer memory efficient sub-quadratic cross-attention layer optimization for automatic choice of optimization") parser.add_argument("--sub-quad-q-chunk-size", type=int, help="query chunk size for the sub-quadratic cross-attention layer optimization to use", default=1024) parser.add_argument("--sub-quad-kv-chunk-size", type=int, help="kv chunk size for the sub-quadratic cross-attention layer optimization to use", default=None) parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the percentage of VRAM threshold for the sub-quadratic cross-attention layer optimization to use chunking", default=None) -parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.") -parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") -parser.add_argument("--opt-sdp-attention", action='store_true', help="enable scaled dot product cross-attention layer optimization; requires PyTorch 2.*") -parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="enable scaled dot product cross-attention layer optimization without memory efficient attention, makes image generation deterministic; requires PyTorch 2.*") -parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") +parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="prefer InvokeAI's cross-attention layer optimization for automatic choice of optimization") +parser.add_argument("--opt-split-attention-v1", action='store_true', help="prefer older version of split attention optimization for automatic choice of optimization") +parser.add_argument("--opt-sdp-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization for automatic choice of optimization; requires PyTorch 2.*") +parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization without memory efficient attention for automatic choice of optimization, makes image generation deterministic; requires PyTorch 2.*") +parser.add_argument("--disable-opt-split-attention", action='store_true', help="does not do anything") parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI") parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower) parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 3c21a362..40f388a5 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -110,6 +110,7 @@ callback_map = dict( callbacks_script_unloaded=[], callbacks_before_ui=[], callbacks_on_reload=[], + callbacks_list_optimizers=[], ) @@ -258,6 +259,18 @@ def before_ui_callback(): report_exception(c, 'before_ui') +def list_optimizers_callback(): + res = [] + + for c in callback_map['callbacks_list_optimizers']: + try: + c.callback(res) + except Exception: + report_exception(c, 'list_optimizers') + + return res + + def add_callback(callbacks, fun): stack = [x for x in inspect.stack() if x.filename != __file__] filename = stack[0].filename if len(stack) > 0 else 'unknown file' @@ -409,3 +422,11 @@ def on_before_ui(callback): """register a function to be called before the UI is created.""" add_callback(callback_map['callbacks_before_ui'], callback) + + +def on_list_optimizers(callback): + """register a function to be called when UI is making a list of cross attention optimization options. + The function will be called with one argument, a list, and shall add objects of type modules.sd_hijack_optimizations.SdOptimization + to it.""" + + add_callback(callback_map['callbacks_list_optimizers'], callback) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 14e7f799..39193be8 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -3,8 +3,9 @@ from torch.nn.functional import silu from types import MethodType import modules.textual_inversion.textual_inversion -from modules import devices, sd_hijack_optimizations, shared +from modules import devices, sd_hijack_optimizations, shared, script_callbacks, errors from modules.hypernetworks import hypernetwork +from modules.sd_hijack_optimizations import diffusionmodules_model_AttnBlock_forward from modules.shared import cmd_opts from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_unet, sd_hijack_xlmr, xlmr @@ -28,57 +29,56 @@ ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] ldm.modules.attention.print = lambda *args: None ldm.modules.diffusionmodules.model.print = lambda *args: None +optimizers = [] +current_optimizer: sd_hijack_optimizations.SdOptimization = None + + +def list_optimizers(): + new_optimizers = script_callbacks.list_optimizers_callback() + + new_optimizers = [x for x in new_optimizers if x.is_available()] + + new_optimizers = sorted(new_optimizers, key=lambda x: x.priority(), reverse=True) + + optimizers.clear() + optimizers.extend(new_optimizers) + def apply_optimizations(): + global current_optimizer + undo_optimizations() ldm.modules.diffusionmodules.model.nonlinearity = silu ldm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th - optimization_method = None + if current_optimizer is not None: + current_optimizer.undo() + current_optimizer = None + + selection = shared.opts.cross_attention_optimization + if selection == "Automatic" and len(optimizers) > 0: + matching_optimizer = next(iter([x for x in optimizers if x.cmd_opt and getattr(shared.cmd_opts, x.cmd_opt, False)]), optimizers[0]) + else: + matching_optimizer = next(iter([x for x in optimizers if x.title() == selection]), None) - can_use_sdp = hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(torch.nn.functional.scaled_dot_product_attention) # not everyone has torch 2.x to use sdp - - if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)): - print("Applying xformers cross attention optimization.") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward - ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward - optimization_method = 'xformers' - elif cmd_opts.opt_sdp_no_mem_attention and can_use_sdp: - print("Applying scaled dot product cross attention optimization (without memory efficient attention).") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.scaled_dot_product_no_mem_attention_forward - ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.sdp_no_mem_attnblock_forward - optimization_method = 'sdp-no-mem' - elif cmd_opts.opt_sdp_attention and can_use_sdp: - print("Applying scaled dot product cross attention optimization.") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.scaled_dot_product_attention_forward - ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.sdp_attnblock_forward - optimization_method = 'sdp' - elif cmd_opts.opt_sub_quad_attention: - print("Applying sub-quadratic cross attention optimization.") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.sub_quad_attention_forward - ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.sub_quad_attnblock_forward - optimization_method = 'sub-quadratic' - elif cmd_opts.opt_split_attention_v1: - print("Applying v1 cross attention optimization.") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 - optimization_method = 'V1' - elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not cmd_opts.opt_split_attention and not torch.cuda.is_available()): - print("Applying cross attention optimization (InvokeAI).") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI - optimization_method = 'InvokeAI' - elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()): - print("Applying cross attention optimization (Doggettx).") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward - ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.cross_attention_attnblock_forward - optimization_method = 'Doggettx' - - return optimization_method + if selection == "None": + matching_optimizer = None + elif matching_optimizer is None: + matching_optimizer = optimizers[0] + + if matching_optimizer is not None: + print(f"Applying optimization: {matching_optimizer.name}") + matching_optimizer.apply() + current_optimizer = matching_optimizer + return current_optimizer.name + else: + return '' def undo_optimizations(): - ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward ldm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity + ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward @@ -169,7 +169,11 @@ class StableDiffusionModelHijack: if m.cond_stage_key == "edit": sd_hijack_unet.hijack_ddpm_edit() - self.optimization_method = apply_optimizations() + try: + self.optimization_method = apply_optimizations() + except Exception as e: + errors.display(e, "applying cross attention optimization") + undo_optimizations() self.clip = m.cond_stage_model @@ -223,6 +227,10 @@ class StableDiffusionModelHijack: return token_count, self.clip.get_target_prompt_token_count(token_count) + def redo_hijack(self, m): + self.undo_hijack(m) + self.hijack(m) + class EmbeddingsWithFixes(torch.nn.Module): def __init__(self, wrapped, embeddings): diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index f00fe55c..1c5b709b 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -9,10 +9,139 @@ from torch import einsum from ldm.util import default from einops import rearrange -from modules import shared, errors, devices +from modules import shared, errors, devices, sub_quadratic_attention, script_callbacks from modules.hypernetworks import hypernetwork -from .sub_quadratic_attention import efficient_dot_product_attention +import ldm.modules.attention +import ldm.modules.diffusionmodules.model + +diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward + + +class SdOptimization: + def __init__(self, name, label=None, cmd_opt=None): + self.name = name + self.label = label + self.cmd_opt = cmd_opt + + def title(self): + if self.label is None: + return self.name + + return f"{self.name} - {self.label}" + + def is_available(self): + return True + + def priority(self): + return 0 + + def apply(self): + pass + + def undo(self): + ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward + + +class SdOptimizationXformers(SdOptimization): + def __init__(self): + super().__init__("xformers", cmd_opt="xformers") + + def is_available(self): + return shared.cmd_opts.force_enable_xformers or (shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)) + + def priority(self): + return 100 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = xformers_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = xformers_attnblock_forward + + +class SdOptimizationSdpNoMem(SdOptimization): + def __init__(self, name="sdp-no-mem", label="scaled dot product without memory efficient attention", cmd_opt="opt_sdp_no_mem_attention"): + super().__init__(name, label, cmd_opt) + + def is_available(self): + return hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(torch.nn.functional.scaled_dot_product_attention) + + def priority(self): + return 90 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = scaled_dot_product_no_mem_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = sdp_no_mem_attnblock_forward + + +class SdOptimizationSdp(SdOptimizationSdpNoMem): + def __init__(self): + super().__init__("sdp", "scaled dot product", cmd_opt="opt_sdp_attention") + + def priority(self): + return 80 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = sdp_attnblock_forward + + +class SdOptimizationSubQuad(SdOptimization): + def __init__(self): + super().__init__("sub-quadratic", cmd_opt="opt_sub_quad_attention") + + def priority(self): + return 10 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = sub_quad_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = sub_quad_attnblock_forward + + +class SdOptimizationV1(SdOptimization): + def __init__(self): + super().__init__("V1", "original v1", cmd_opt="opt_split_attention_v1") + + def priority(self): + return 10 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_v1 + + +class SdOptimizationInvokeAI(SdOptimization): + def __init__(self): + super().__init__("InvokeAI", cmd_opt="opt_split_attention_invokeai") + + def priority(self): + return 1000 if not torch.cuda.is_available() else 10 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward_invokeAI + + +class SdOptimizationDoggettx(SdOptimization): + def __init__(self): + super().__init__("Doggettx", cmd_opt="opt_split_attention") + + def priority(self): + return 20 + + def apply(self): + ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward + ldm.modules.diffusionmodules.model.AttnBlock.forward = cross_attention_attnblock_forward + + +def list_optimizers(res): + res.extend([ + SdOptimizationXformers(), + SdOptimizationSdpNoMem(), + SdOptimizationSdp(), + SdOptimizationSubQuad(), + SdOptimizationV1(), + SdOptimizationInvokeAI(), + SdOptimizationDoggettx(), + ]) if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers: @@ -299,7 +428,7 @@ def sub_quad_attention(q, k, v, q_chunk_size=1024, kv_chunk_size=None, kv_chunk_ kv_chunk_size = k_tokens with devices.without_autocast(disable=q.dtype == v.dtype): - return efficient_dot_product_attention( + return sub_quadratic_attention.efficient_dot_product_attention( q, k, v, diff --git a/modules/shared.py b/modules/shared.py index fdbab5c4..7cfbaa0c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -417,6 +417,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { })) options_templates.update(options_section(('optimizations', "Optimizations"), { + "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}), "s_min_uncond": OptionInfo(0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 4.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"), "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"), "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"), diff --git a/modules/shared_items.py b/modules/shared_items.py index e792a134..2a8713c8 100644 --- a/modules/shared_items.py +++ b/modules/shared_items.py @@ -21,3 +21,11 @@ def refresh_vae_list(): import modules.sd_vae modules.sd_vae.refresh_vae_list() + + +def cross_attention_optimizations(): + import modules.sd_hijack + + return ["Automatic"] + [x.title() for x in modules.sd_hijack.optimizers] + ["None"] + + diff --git a/webui.py b/webui.py index b4a21e73..afe3c5fa 100644 --- a/webui.py +++ b/webui.py @@ -52,6 +52,7 @@ import modules.img2img import modules.lowvram import modules.scripts import modules.sd_hijack +import modules.sd_hijack_optimizations import modules.sd_models import modules.sd_vae import modules.txt2img @@ -200,6 +201,10 @@ def initialize(): modules.textual_inversion.textual_inversion.list_textual_inversion_templates() startup_timer.record("refresh textual inversion templates") + modules.script_callbacks.on_list_optimizers(modules.sd_hijack_optimizations.list_optimizers) + modules.sd_hijack.list_optimizers() + startup_timer.record("scripts list_optimizers") + # load model in parallel to other startup stuff Thread(target=lambda: shared.sd_model).start() @@ -208,6 +213,7 @@ def initialize(): shared.opts.onchange("sd_vae_as_default", wrap_queued_call(lambda: modules.sd_vae.reload_vae_weights()), call=False) shared.opts.onchange("temp_dir", ui_tempdir.on_tmpdir_changed) shared.opts.onchange("gradio_theme", shared.reload_gradio_theme) + shared.opts.onchange("cross_attention_optimization", wrap_queued_call(lambda: modules.sd_hijack.model_hijack.redo_hijack(shared.sd_model)), call=False) startup_timer.record("opts onchange") shared.reload_hypernetworks() @@ -428,6 +434,10 @@ def webui(): extra_networks.register_extra_network(extra_networks_hypernet.ExtraNetworkHypernet()) startup_timer.record("initialize extra networks") + modules.script_callbacks.on_list_optimizers(modules.sd_hijack_optimizations.list_optimizers) + modules.sd_hijack.list_optimizers() + startup_timer.record("scripts list_optimizers") + if __name__ == "__main__": if cmd_opts.nowebui: -- cgit v1.2.3