From cd48308a2a37b1e838b1b0cc5e8e507a174b14fb Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 28 Aug 2023 22:22:35 +0300 Subject: always show NV as RNG source in infotext --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 7dc931ba..0138e5ac 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -689,7 +689,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio, "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr, "Init image hash": getattr(p, 'init_img_hash', None), - "RNG": opts.randn_source if opts.randn_source != "GPU" and opts.randn_source != "NV" else None, + "RNG": opts.randn_source if opts.randn_source != "GPU" else None, "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond, "Tiling": "True" if p.tiling else None, **p.extra_generation_params, -- cgit v1.2.3 From ae0b2cc1964486ba847290ad752d9a284b6d63ba Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Wed, 30 Aug 2023 18:22:50 +0300 Subject: add an option to choose how to combine hires fix and refiner --- modules/processing.py | 18 ++++++------------ modules/sd_samplers_common.py | 13 +++++++++++-- modules/shared_options.py | 1 + 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 0138e5ac..f696e925 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1148,18 +1148,12 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): else: decoded_samples = None - current = shared.sd_model.sd_checkpoint_info - try: - if self.hr_checkpoint_info is not None: - self.sampler = None - sd_models.reload_model_weights(info=self.hr_checkpoint_info) - devices.torch_gc() - - return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) - finally: - self.sampler = None - sd_models.reload_model_weights(info=current) - devices.torch_gc() + with sd_models.SkipWritingToConfig(): + sd_models.reload_model_weights(info=self.hr_checkpoint_info) + + devices.torch_gc() + + return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts): if shared.state.interrupted: diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 60fa161c..6c935a38 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -164,8 +164,17 @@ def apply_refiner(cfg_denoiser): if refiner_checkpoint_info is None or shared.sd_model.sd_checkpoint_info == refiner_checkpoint_info: return False - if getattr(cfg_denoiser.p, "enable_hr", False) and not cfg_denoiser.p.is_hr_pass: - return False + if getattr(cfg_denoiser.p, "enable_hr", False): + is_second_pass = cfg_denoiser.p.is_hr_pass + + if opts.hires_fix_refiner_pass == "first pass" and is_second_pass: + return False + + if opts.hires_fix_refiner_pass == "second pass" and not is_second_pass: + return False + + if opts.hires_fix_refiner_pass != "second pass": + cfg_denoiser.p.extra_generation_params['Hires refiner'] = opts.hires_fix_refiner_pass cfg_denoiser.p.extra_generation_params['Refiner'] = refiner_checkpoint_info.short_title cfg_denoiser.p.extra_generation_params['Refiner switch at'] = refiner_switch_at diff --git a/modules/shared_options.py b/modules/shared_options.py index 78652ea2..00b273fa 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -146,6 +146,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"), "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"), "tiling": OptionInfo(False, "Tiling", infotext='Tiling').info("produce a tileable picture"), + "hires_fix_refiner_pass": OptionInfo("second pass", "Hires fix: which pass to enable refiner for", gr.Radio, {"choices": ["first pass", "second pass", "both passes"]}, infotext="Hires refiner"), })) options_templates.update(options_section(('sdxl', "Stable Diffusion XL"), { -- cgit v1.2.3 From 6adf2b71c2c89f84d4aee1e230276dcd1a3fab62 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Wed, 30 Aug 2023 19:08:04 +0300 Subject: fix inpainting models in txt2img creating black pictures --- modules/processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index f696e925..e08b6305 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -91,8 +91,8 @@ def create_binary_mask(image): def txt2img_image_conditioning(sd_model, x, width, height): if sd_model.model.conditioning_key in {'hybrid', 'concat'}: # Inpainting models - # The "masked-image" in this case will just be all zeros since the entire image is masked. - image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device) + # The "masked-image" in this case will just be all 0.5 since the entire image is masked. + image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5 image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method)) # Add the fake full 1s mask to the first dimension. -- cgit v1.2.3 From f11eec81e31bfc9195bbacda13b2a3ce7b98fd92 Mon Sep 17 00:00:00 2001 From: ibrainventures Date: Thu, 7 Sep 2023 23:19:52 +0200 Subject: (feat) Include Program Version in info response. Update processing.py This would help to organize / memorize the program version for the creation process. (as it is also unformated included inside the infotext). --- modules/processing.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index e124e7f0..0c191428 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -533,6 +533,7 @@ class Processed: self.all_seeds = all_seeds or p.all_seeds or [self.seed] self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed] self.infotexts = infotexts or [info] + self.version = program_version() def js(self): obj = { @@ -567,6 +568,7 @@ class Processed: "job_timestamp": self.job_timestamp, "clip_skip": self.clip_skip, "is_using_inpainting_conditioning": self.is_using_inpainting_conditioning, + "version": self.version, } return json.dumps(obj) -- cgit v1.2.3 From ab5741717546758c57cf6c2a040645ec2b44690a Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 9 Sep 2023 22:35:50 +0900 Subject: prevent accessing non-existing keys --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 0c191428..618f8abe 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -711,7 +711,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None: p.scripts.before_process(p) - stored_opts = {k: opts.data[k] for k in p.override_settings.keys()} + stored_opts = {k: opts.data[k] for k in p.override_settings.keys() if k in opts.data} try: # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint -- cgit v1.2.3 From 3f763d41e8ff7f09f89adb00eec440f18566d260 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Sun, 1 Oct 2023 22:38:27 -0600 Subject: Change denoising_strength default to None. --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index e124e7f0..061d9955 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -142,7 +142,7 @@ class StableDiffusionProcessing: overlay_images: list = None eta: float = None do_not_reload_embeddings: bool = False - denoising_strength: float = 0 + denoising_strength: float = None ddim_discretize: str = None s_min_uncond: float = None s_churn: float = None -- cgit v1.2.3 From 9821625a76177ebc8b62a1ee6d8ef39cf4805f99 Mon Sep 17 00:00:00 2001 From: Leon Date: Mon, 9 Oct 2023 18:36:48 +0800 Subject: fix the key error exception when adding an overwriting key which is defined in the extensions --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 36bc94f7..fee2440f 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -711,7 +711,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None: p.scripts.before_process(p) - stored_opts = {k: opts.data[k] for k in p.override_settings.keys() if k in opts.data} + stored_opts = {k: opts.data[k] if k in opts.data else opts.get_default(k) for k in p.override_settings.keys() if k in opts.data} try: # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint -- cgit v1.2.3 From fbc8d213546047d8970b92809e15b33e8a1301be Mon Sep 17 00:00:00 2001 From: Won-Kyu Park Date: Sat, 14 Oct 2023 02:39:04 +0900 Subject: fix IndexError: list index out of range error interrupted while postprocess --- modules/processing.py | 1 + 1 file changed, 1 insertion(+) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 36bc94f7..df037fb0 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -820,6 +820,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: state.skipped = False if state.interrupted: + infotexts.append(Processed(p, []).infotext(p, 0)) break sd_models.reload_model_weights() # model can be changed for example by refiner -- cgit v1.2.3 From a109c7aeb8871fe0ae201794f140f8f2e9b5c3ac Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 14 Oct 2023 07:49:03 +0300 Subject: more general case of adding an infotext when no images have been generated --- modules/processing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index df037fb0..816f5fc7 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -820,7 +820,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: state.skipped = False if state.interrupted: - infotexts.append(Processed(p, []).infotext(p, 0)) break sd_models.reload_model_weights() # model can be changed for example by refiner @@ -961,6 +960,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: state.nextjob() + if not infotexts: + infotexts.append(Processed(p, []).infotext(p, 0)) + p.color_corrections = None index_of_first_image = 0 -- cgit v1.2.3 From 5121846d34d74aee9b55d48d35c1559a710051b0 Mon Sep 17 00:00:00 2001 From: Won-Kyu Park Date: Wed, 25 Oct 2023 21:37:55 +0900 Subject: call state.jobnext() before postproces*() --- modules/processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 40598f5c..70ad1ebe 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -886,6 +886,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: devices.torch_gc() + state.nextjob() + if p.scripts is not None: p.scripts.postprocess_batch(p, x_samples_ddim, batch_number=n) @@ -958,8 +960,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: devices.torch_gc() - state.nextjob() - if not infotexts: infotexts.append(Processed(p, []).infotext(p, 0)) -- cgit v1.2.3 From 5e80d9ee99c5899e5e2b130408ffb65a0585a62a Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Tue, 7 Nov 2023 11:33:16 +0300 Subject: fix pix2pix producing bad results --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 70ad1ebe..b0e240a4 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -296,7 +296,7 @@ class StableDiffusionProcessing: return conditioning def edit_image_conditioning(self, source_image): - conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) + conditioning_image = shared.sd_model.encode_first_stage(source_image).mode() return conditioning_image -- cgit v1.2.3 From 294f8a514f982248cda1cafda30d35566f3a0321 Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Sat, 11 Nov 2023 23:28:12 +0900 Subject: add hyperTile https://github.com/tfernd/HyperTile --- modules/processing.py | 27 ++++++++++++++++++++++++--- modules/shared_options.py | 2 ++ 2 files changed, 26 insertions(+), 3 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index b0e240a4..e2309534 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -799,6 +799,16 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: infotexts = [] output_images = [] + unet_object = p.sd_model.model + vae_model = p.sd_model.first_stage_model + try: + from hyper_tile import split_attention, flush + except (ImportError, ModuleNotFoundError): # pip install git+https://github.com/tfernd/HyperTile@2ef64b2800d007d305755c33550537410310d7df + split_attention = lambda *args, **kwargs: lambda x: x # return a no-op context manager + flush = lambda: None + import random + saved_rng_state = random.getstate() + random.seed(p.seed) # hyper_tile uses random, so we need to seed it with torch.no_grad(), p.sd_model.ema_scope(): with devices.autocast(): @@ -866,15 +876,25 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: shared.state.job = f"Batch {n+1} out of {p.n_iter}" with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast(): - samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts) + # get largest tile size available, which is 2^x which is factor of gcd of p.width and p.height + gcd = math.gcd(p.width, p.height) + largest_tile_size_available = 1 + while gcd % (largest_tile_size_available * 2) == 0: + largest_tile_size_available *= 2 + aspect_ratio = p.width / p.height + with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn): + with split_attention(unet_object, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn): + flush() + samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts) if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - - x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) + with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn): + flush() + x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) @@ -980,6 +1000,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if opts.grid_save: images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(use_main_prompt=True), short_filename=not opts.grid_extended_filename, p=p, grid=True) + random.setstate(saved_rng_state) if not p.disable_extra_networks and p.extra_network_data: extra_networks.deactivate(p, p.extra_network_data) diff --git a/modules/shared_options.py b/modules/shared_options.py index d40db530..d9650265 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -200,6 +200,8 @@ options_templates.update(options_section(('optimizations', "Optimizations"), { "pad_cond_uncond": OptionInfo(False, "Pad prompt/negative prompt to be same length", infotext='Pad conds').info("improves performance when prompt and negative prompt have different lengths; changes seeds"), "persistent_cond_cache": OptionInfo(True, "Persistent cond cache").info("do not recalculate conds from prompts if prompts have not changed since previous calculation"), "batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"), + "hypertile_split_unet_attn" : OptionInfo(False, "Split attention in Unet with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"), + "hypertile_split_vae_attn": OptionInfo(False, "Split attention in VAE with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"), })) options_templates.update(options_section(('compatibility', "Compatibility"), { -- cgit v1.2.3 From b29fc6d4de8812b25c520a46676cda13c3fe64ca Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Sat, 11 Nov 2023 23:43:13 +0900 Subject: Implement Hypertile Co-Authored-By: Kieran Hunt --- modules/hypertile.py | 333 ++++++++++++++++++++++++++++++++++++++++++++++++++ modules/processing.py | 65 ++++------ 2 files changed, 358 insertions(+), 40 deletions(-) create mode 100644 modules/hypertile.py (limited to 'modules/processing.py') diff --git a/modules/hypertile.py b/modules/hypertile.py new file mode 100644 index 00000000..ab1c74c0 --- /dev/null +++ b/modules/hypertile.py @@ -0,0 +1,333 @@ +""" +Hypertile module for splitting attention layers in SD-1.5 U-Net and SD-1.5 VAE +Warn : The patch works well only if the input image has a width and height that are multiples of 128 +Author : @tfernd Github : https://github.com/tfernd/HyperTile +""" + +from __future__ import annotations +from typing import Callable +from typing_extensions import Literal + +import logging +from functools import wraps, cache +from contextlib import contextmanager + +import math +import torch.nn as nn +import random + +from einops import rearrange + +# TODO add SD-XL layers +DEPTH_LAYERS = { + 0: [ + # SD 1.5 U-Net (diffusers) + "down_blocks.0.attentions.0.transformer_blocks.0.attn1", + "down_blocks.0.attentions.1.transformer_blocks.0.attn1", + "up_blocks.3.attentions.0.transformer_blocks.0.attn1", + "up_blocks.3.attentions.1.transformer_blocks.0.attn1", + "up_blocks.3.attentions.2.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "input_blocks.1.1.transformer_blocks.0.attn1", + "input_blocks.2.1.transformer_blocks.0.attn1", + "output_blocks.9.1.transformer_blocks.0.attn1", + "output_blocks.10.1.transformer_blocks.0.attn1", + "output_blocks.11.1.transformer_blocks.0.attn1", + # SD 1.5 VAE + "decoder.mid_block.attentions.0", + ], + 1: [ + # SD 1.5 U-Net (diffusers) + "down_blocks.1.attentions.0.transformer_blocks.0.attn1", + "down_blocks.1.attentions.1.transformer_blocks.0.attn1", + "up_blocks.2.attentions.0.transformer_blocks.0.attn1", + "up_blocks.2.attentions.1.transformer_blocks.0.attn1", + "up_blocks.2.attentions.2.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "input_blocks.4.1.transformer_blocks.0.attn1", + "input_blocks.5.1.transformer_blocks.0.attn1", + "output_blocks.6.1.transformer_blocks.0.attn1", + "output_blocks.7.1.transformer_blocks.0.attn1", + "output_blocks.8.1.transformer_blocks.0.attn1", + ], + 2: [ + # SD 1.5 U-Net (diffusers) + "down_blocks.2.attentions.0.transformer_blocks.0.attn1", + "down_blocks.2.attentions.1.transformer_blocks.0.attn1", + "up_blocks.1.attentions.0.transformer_blocks.0.attn1", + "up_blocks.1.attentions.1.transformer_blocks.0.attn1", + "up_blocks.1.attentions.2.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "input_blocks.7.1.transformer_blocks.0.attn1", + "input_blocks.8.1.transformer_blocks.0.attn1", + "output_blocks.3.1.transformer_blocks.0.attn1", + "output_blocks.4.1.transformer_blocks.0.attn1", + "output_blocks.5.1.transformer_blocks.0.attn1", + ], + 3: [ + # SD 1.5 U-Net (diffusers) + "mid_block.attentions.0.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "middle_block.1.transformer_blocks.0.attn1", + ], +} +# XL layers, thanks for GitHub@gel-crabs for the help +DEPTH_LAYERS_XL = { + 0: [ + # SD 1.5 U-Net (diffusers) + "down_blocks.0.attentions.0.transformer_blocks.0.attn1", + "down_blocks.0.attentions.1.transformer_blocks.0.attn1", + "up_blocks.3.attentions.0.transformer_blocks.0.attn1", + "up_blocks.3.attentions.1.transformer_blocks.0.attn1", + "up_blocks.3.attentions.2.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "input_blocks.4.1.transformer_blocks.0.attn1", + "input_blocks.5.1.transformer_blocks.0.attn1", + "output_blocks.3.1.transformer_blocks.0.attn1", + "output_blocks.4.1.transformer_blocks.0.attn1", + "output_blocks.5.1.transformer_blocks.0.attn1", + # SD 1.5 VAE + "decoder.mid_block.attentions.0", + "decoder.mid.attn_1", + ], + 1: [ + # SD 1.5 U-Net (diffusers) + #"down_blocks.1.attentions.0.transformer_blocks.0.attn1", + #"down_blocks.1.attentions.1.transformer_blocks.0.attn1", + #"up_blocks.2.attentions.0.transformer_blocks.0.attn1", + #"up_blocks.2.attentions.1.transformer_blocks.0.attn1", + #"up_blocks.2.attentions.2.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "input_blocks.4.1.transformer_blocks.1.attn1", + "input_blocks.5.1.transformer_blocks.1.attn1", + "output_blocks.3.1.transformer_blocks.1.attn1", + "output_blocks.4.1.transformer_blocks.1.attn1", + "output_blocks.5.1.transformer_blocks.1.attn1", + "input_blocks.7.1.transformer_blocks.0.attn1", + "input_blocks.8.1.transformer_blocks.0.attn1", + "output_blocks.0.1.transformer_blocks.0.attn1", + "output_blocks.1.1.transformer_blocks.0.attn1", + "output_blocks.2.1.transformer_blocks.0.attn1", + "input_blocks.7.1.transformer_blocks.1.attn1", + "input_blocks.8.1.transformer_blocks.1.attn1", + "output_blocks.0.1.transformer_blocks.1.attn1", + "output_blocks.1.1.transformer_blocks.1.attn1", + "output_blocks.2.1.transformer_blocks.1.attn1", + "input_blocks.7.1.transformer_blocks.2.attn1", + "input_blocks.8.1.transformer_blocks.2.attn1", + "output_blocks.0.1.transformer_blocks.2.attn1", + "output_blocks.1.1.transformer_blocks.2.attn1", + "output_blocks.2.1.transformer_blocks.2.attn1", + "input_blocks.7.1.transformer_blocks.3.attn1", + "input_blocks.8.1.transformer_blocks.3.attn1", + "output_blocks.0.1.transformer_blocks.3.attn1", + "output_blocks.1.1.transformer_blocks.3.attn1", + "output_blocks.2.1.transformer_blocks.3.attn1", + "input_blocks.7.1.transformer_blocks.4.attn1", + "input_blocks.8.1.transformer_blocks.4.attn1", + "output_blocks.0.1.transformer_blocks.4.attn1", + "output_blocks.1.1.transformer_blocks.4.attn1", + "output_blocks.2.1.transformer_blocks.4.attn1", + "input_blocks.7.1.transformer_blocks.5.attn1", + "input_blocks.8.1.transformer_blocks.5.attn1", + "output_blocks.0.1.transformer_blocks.5.attn1", + "output_blocks.1.1.transformer_blocks.5.attn1", + "output_blocks.2.1.transformer_blocks.5.attn1", + "input_blocks.7.1.transformer_blocks.6.attn1", + "input_blocks.8.1.transformer_blocks.6.attn1", + "output_blocks.0.1.transformer_blocks.6.attn1", + "output_blocks.1.1.transformer_blocks.6.attn1", + "output_blocks.2.1.transformer_blocks.6.attn1", + "input_blocks.7.1.transformer_blocks.7.attn1", + "input_blocks.8.1.transformer_blocks.7.attn1", + "output_blocks.0.1.transformer_blocks.7.attn1", + "output_blocks.1.1.transformer_blocks.7.attn1", + "output_blocks.2.1.transformer_blocks.7.attn1", + "input_blocks.7.1.transformer_blocks.8.attn1", + "input_blocks.8.1.transformer_blocks.8.attn1", + "output_blocks.0.1.transformer_blocks.8.attn1", + "output_blocks.1.1.transformer_blocks.8.attn1", + "output_blocks.2.1.transformer_blocks.8.attn1", + "input_blocks.7.1.transformer_blocks.9.attn1", + "input_blocks.8.1.transformer_blocks.9.attn1", + "output_blocks.0.1.transformer_blocks.9.attn1", + "output_blocks.1.1.transformer_blocks.9.attn1", + "output_blocks.2.1.transformer_blocks.9.attn1", + ], + 2: [ + # SD 1.5 U-Net (diffusers) + "mid_block.attentions.0.transformer_blocks.0.attn1", + # SD 1.5 U-Net (ldm) + "middle_block.1.transformer_blocks.0.attn1", + "middle_block.1.transformer_blocks.1.attn1", + "middle_block.1.transformer_blocks.2.attn1", + "middle_block.1.transformer_blocks.3.attn1", + "middle_block.1.transformer_blocks.4.attn1", + "middle_block.1.transformer_blocks.5.attn1", + "middle_block.1.transformer_blocks.6.attn1", + "middle_block.1.transformer_blocks.7.attn1", + "middle_block.1.transformer_blocks.8.attn1", + "middle_block.1.transformer_blocks.9.attn1", + ], +} + + +RNG_INSTANCE = random.Random() + +def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: + """ + Returns a random divisor of value that + x * min_value <= value + if max_options is 1, the behavior is deterministic + """ + min_value = min(min_value, value) + + # All big divisors of value (inclusive) + divisors = [i for i in range(min_value, value + 1) if value % i == 0] # divisors in small -> big order + + ns = [value // i for i in divisors[:max_options]] # has at least 1 element # big -> small order + + idx = RNG_INSTANCE.randint(0, len(ns) - 1) + + return ns[idx] + +def set_hypertile_seed(seed: int) -> None: + RNG_INSTANCE.seed(seed) + +def largest_tile_size_available(width:int, height:int) -> int: + """ + Calculates the largest tile size available for a given width and height + Tile size is always a power of 2 + """ + gcd = math.gcd(width, height) + largest_tile_size_available = 1 + while gcd % (largest_tile_size_available * 2) == 0: + largest_tile_size_available *= 2 + return largest_tile_size_available + +def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]: + """ + Finds h and w such that h*w = hw and h/w = aspect_ratio + We check all possible divisors of hw and return the closest to the aspect ratio + """ + divisors = [i for i in range(2, hw + 1) if hw % i == 0] # all divisors of hw + pairs = [(i, hw // i) for i in divisors] # all pairs of divisors of hw + ratios = [w/h for h, w in pairs] # all ratios of pairs of divisors of hw + closest_ratio = min(ratios, key=lambda x: abs(x - aspect_ratio)) # closest ratio to aspect_ratio + closest_pair = pairs[ratios.index(closest_ratio)] # closest pair of divisors to aspect_ratio + return closest_pair + +@cache +def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]: + """ + Finds h and w such that h*w = hw and h/w = aspect_ratio + """ + h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio)) + # find h and w such that h*w = hw and h/w = aspect_ratio + if h * w != hw: + w_candidate = hw / h + # check if w is an integer + if not w_candidate.is_integer(): + h_candidate = hw / w + # check if h is an integer + if not h_candidate.is_integer(): + return iterative_closest_divisors(hw, aspect_ratio) + else: + h = int(h_candidate) + else: + w = int(w_candidate) + return h, w + +@contextmanager +def split_attention( + layer: nn.Module, + /, + aspect_ratio: float, # width/height + tile_size: int = 128, # 128 for VAE + swap_size: int = 1, # 1 for VAE + *, + disable: bool = False, + max_depth: Literal[0, 1, 2, 3] = 0, # ! Try 0 or 1 + scale_depth: bool = True, # scale the tile-size depending on the depth + is_sdxl: bool = False, # is the model SD-XL +): + # Hijacks AttnBlock from ldm and Attention from diffusers + + if disable: + logging.info(f"Attention for {layer.__class__.__qualname__} not splitted") + yield + return + + latent_tile_size = max(128, tile_size) // 8 + + def self_attn_forward(forward: Callable, depth: int, layer_name: str, module: nn.Module) -> Callable: + @wraps(forward) + def wrapper(*args, **kwargs): + x = args[0] + + # VAE + if x.ndim == 4: + b, c, h, w = x.shape + + nh = random_divisor(h, latent_tile_size, swap_size) + nw = random_divisor(w, latent_tile_size, swap_size) + + if nh * nw > 1: + x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw) # split into nh * nw tiles + + out = forward(x, *args[1:], **kwargs) + + if nh * nw > 1: + out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw) + + # U-Net + else: + hw: int = x.size(1) + h, w = find_hw_candidates(hw, aspect_ratio) + assert h * w == hw, f"Invalid aspect ratio {aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}" + + factor = 2**depth if scale_depth else 1 + nh = random_divisor(h, latent_tile_size * factor, swap_size) + nw = random_divisor(w, latent_tile_size * factor, swap_size) + + module._split_sizes_hypertile.append((nh, nw)) # type: ignore + + if nh * nw > 1: + x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) + + out = forward(x, *args[1:], **kwargs) + + if nh * nw > 1: + out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) + out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) + + return out + + return wrapper + + # Handle hijacking the forward method and recovering afterwards + try: + if is_sdxl: + layers = DEPTH_LAYERS_XL + else: + layers = DEPTH_LAYERS + for depth in range(max_depth + 1): + for layer_name, module in layer.named_modules(): + if any(layer_name.endswith(try_name) for try_name in layers[depth]): + # print input shape for debugging + logging.debug(f"HyperTile hijacking attention layer at depth {depth}: {layer_name}") + # hijack + module._original_forward_hypertile = module.forward + module.forward = self_attn_forward(module.forward, depth, layer_name, module) + module._split_sizes_hypertile = [] + yield + finally: + for layer_name, module in layer.named_modules(): + # remove hijack + if hasattr(module, "_original_forward_hypertile"): + if module._split_sizes_hypertile: + logging.debug(f"layer {layer_name} splitted with ({module._split_sizes_hypertile})") + # recover + module.forward = module._original_forward_hypertile + del module._original_forward_hypertile + del module._split_sizes_hypertile diff --git a/modules/processing.py b/modules/processing.py index e2309534..e19a09a3 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -24,6 +24,7 @@ from modules.shared import opts, cmd_opts, state import modules.shared as shared import modules.paths as paths import modules.face_restoration +from modules.hypertile import split_attention, set_hypertile_seed, largest_tile_size_available import modules.images as images import modules.styles import modules.sd_models as sd_models @@ -799,17 +800,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: infotexts = [] output_images = [] - unet_object = p.sd_model.model - vae_model = p.sd_model.first_stage_model - try: - from hyper_tile import split_attention, flush - except (ImportError, ModuleNotFoundError): # pip install git+https://github.com/tfernd/HyperTile@2ef64b2800d007d305755c33550537410310d7df - split_attention = lambda *args, **kwargs: lambda x: x # return a no-op context manager - flush = lambda: None - import random - saved_rng_state = random.getstate() - random.seed(p.seed) # hyper_tile uses random, so we need to seed it - with torch.no_grad(), p.sd_model.ema_scope(): with devices.autocast(): p.init(p.all_prompts, p.all_seeds, p.all_subseeds) @@ -871,29 +861,20 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: p.comment(comment) p.extra_generation_params.update(model_hijack.extra_generation_params) - + set_hypertile_seed(p.seed) + # add batch size + hypertile status to information to reproduce the run if p.n_iter > 1: shared.state.job = f"Batch {n+1} out of {p.n_iter}" with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast(): - # get largest tile size available, which is 2^x which is factor of gcd of p.width and p.height - gcd = math.gcd(p.width, p.height) - largest_tile_size_available = 1 - while gcd % (largest_tile_size_available * 2) == 0: - largest_tile_size_available *= 2 - aspect_ratio = p.width / p.height - with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn): - with split_attention(unet_object, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn): - flush() - samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts) + samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts) if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - with split_attention(vae_model, aspect_ratio=aspect_ratio, tile_size=min(largest_tile_size_available, 128), disable=not shared.opts.hypertile_split_vae_attn): - flush() + with split_attention(p.sd_model.first_stage_model, aspect_ratio = p.width / p.height, tile_size=min(largest_tile_size_available(p.width, p.height), 128), disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() @@ -1000,7 +981,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if opts.grid_save: images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(use_main_prompt=True), short_filename=not opts.grid_extended_filename, p=p, grid=True) - random.setstate(saved_rng_state) if not p.disable_extra_networks and p.extra_network_data: extra_networks.deactivate(p, p.extra_network_data) @@ -1161,24 +1141,25 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model) - + aspect_ratio = self.width / self.height x = self.rng.next() - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) + tile_size = largest_tile_size_available(self.width, self.height) + with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl): + devices.torch_gc() + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) del x - if not self.enable_hr: return samples if self.latent_scale_mode is None: - decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) + with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) else: decoded_samples = None with sd_models.SkipWritingToConfig(): sd_models.reload_model_weights(info=self.hr_checkpoint_info) - - devices.torch_gc() - return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts): @@ -1186,7 +1167,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): return samples self.is_hr_pass = True - target_width = self.hr_upscale_to_x target_height = self.hr_upscale_to_y @@ -1264,18 +1244,19 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): if self.scripts is not None: self.scripts.before_hr(self) - - samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) + tile_size = largest_tile_size_available(target_width, target_height) + with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + with split_attention(self.sd_model.model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=3, max_depth=1,scale_depth=True, disable=not opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl): + samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio()) self.sampler = None devices.torch_gc() - - decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) + with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) self.is_hr_pass = False - return decoded_samples def close(self): @@ -1550,8 +1531,12 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.initial_noise_multiplier != 1.0: self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier x *= self.initial_noise_multiplier - - samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) + aspect_ratio = self.width / self.height + tile_size = largest_tile_size_available(self.width, self.height) + with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl): + devices.torch_gc() + samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) if self.mask is not None: samples = samples * self.nmask + self.init_latent * self.mask -- cgit v1.2.3 From bcfaf3979a9f93e37c418b58c75b02d9570b4354 Mon Sep 17 00:00:00 2001 From: AngelBottomless Date: Thu, 16 Nov 2023 18:43:16 +0900 Subject: convert/add hypertile options --- modules/hypertile.py | 36 ++++++++++++++++++++++++++++++++++++ modules/processing.py | 21 +++++++++++---------- modules/shared_options.py | 6 ++++++ 3 files changed, 53 insertions(+), 10 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/hypertile.py b/modules/hypertile.py index 32d8604c..fee24a8c 100644 --- a/modules/hypertile.py +++ b/modules/hypertile.py @@ -332,3 +332,39 @@ def split_attention( module.forward = module._original_forward_hypertile del module._original_forward_hypertile del module._split_sizes_hypertile + +def hypertile_context_vae(model:nn.Module, aspect_ratio:float, tile_size:int, opts): + """ + Returns context manager for VAE + """ + enabled = not opts.hypertile_split_vae_attn + swap_size = opts.hypertile_swap_size_vae + max_depth = opts.hypertile_max_depth_vae + tile_size_max = opts.hypertile_max_tile_vae + return split_attention( + model, + aspect_ratio=aspect_ratio, + tile_size=min(tile_size, tile_size_max), + swap_size=swap_size, + disable=not enabled, + max_depth=max_depth, + is_sdxl=False, + ) + +def hypertile_context_unet(model:nn.Module, aspect_ratio:float, tile_size:int, opts, is_sdxl:bool): + """ + Returns context manager for U-Net + """ + enabled = not opts.hypertile_split_unet_attn + swap_size = opts.hypertile_swap_size_unet + max_depth = opts.hypertile_max_depth_unet + tile_size_max = opts.hypertile_max_tile_unet + return split_attention( + model, + aspect_ratio=aspect_ratio, + tile_size=min(tile_size, tile_size_max), + swap_size=swap_size, + disable=not enabled, + max_depth=max_depth, + is_sdxl=is_sdxl, + ) \ No newline at end of file diff --git a/modules/processing.py b/modules/processing.py index e19a09a3..c622ff33 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -24,7 +24,7 @@ from modules.shared import opts, cmd_opts, state import modules.shared as shared import modules.paths as paths import modules.face_restoration -from modules.hypertile import split_attention, set_hypertile_seed, largest_tile_size_available +from modules.hypertile import set_hypertile_seed, largest_tile_size_available, hypertile_context_unet, hypertile_context_vae import modules.images as images import modules.styles import modules.sd_models as sd_models @@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - with split_attention(p.sd_model.first_stage_model, aspect_ratio = p.width / p.height, tile_size=min(largest_tile_size_available(p.width, p.height), 128), disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + with hypertile_context_unet(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() @@ -1144,8 +1144,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): aspect_ratio = self.width / self.height x = self.rng.next() tile_size = largest_tile_size_available(self.width, self.height) - with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): - with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl): + with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): + with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): devices.torch_gc() samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) del x @@ -1153,7 +1153,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): return samples if self.latent_scale_mode is None: - with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) else: decoded_samples = None @@ -1245,15 +1245,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): if self.scripts is not None: self.scripts.before_hr(self) tile_size = largest_tile_size_available(target_width, target_height) - with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): - with split_attention(self.sd_model.model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=3, max_depth=1,scale_depth=True, disable=not opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl): + aspect_ratio = self.width / self.height + with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): + with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio()) self.sampler = None devices.torch_gc() - with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): + with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) self.is_hr_pass = False @@ -1533,8 +1534,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): x *= self.initial_noise_multiplier aspect_ratio = self.width / self.height tile_size = largest_tile_size_available(self.width, self.height) - with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl): - with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl): + with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): + with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): devices.torch_gc() samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) diff --git a/modules/shared_options.py b/modules/shared_options.py index d9650265..28a48906 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -202,6 +202,12 @@ options_templates.update(options_section(('optimizations', "Optimizations"), { "batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"), "hypertile_split_unet_attn" : OptionInfo(False, "Split attention in Unet with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"), "hypertile_split_vae_attn": OptionInfo(False, "Split attention in VAE with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"), + "hypertile_max_depth_vae" : OptionInfo(3, "Max depth for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), + "hypertile_max_depth_unet" : OptionInfo(3, "Max depth for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), + "hypertile_max_tile_vae" : OptionInfo(128, "Max tile size for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).link("Github", "https://github.com/tfernd/HyperTile"), + "hypertile_max_tile_unet" : OptionInfo(256, "Max tile size for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).link("Github", "https://github.com/tfernd/HyperTile"), + "hypertile_swap_size_unet": OptionInfo(3, "Swap size for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), + "hypertile_swap_size_vae": OptionInfo(3, "Swap size for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), })) options_templates.update(options_section(('compatibility', "Compatibility"), { -- cgit v1.2.3 From c40be2252ab1c8c289562db208c5ac6618bd8545 Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 17 Nov 2023 09:22:27 +0900 Subject: Fix critical issue - unet apply --- modules/processing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index c622ff33..2fda7f33 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - with hypertile_context_unet(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): + with hypertile_context_unet(p.sd_model.model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() @@ -1145,7 +1145,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): x = self.rng.next() tile_size = largest_tile_size_available(self.width, self.height) with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): + with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): devices.torch_gc() samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) del x @@ -1247,7 +1247,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): tile_size = largest_tile_size_available(target_width, target_height) aspect_ratio = self.width / self.height with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): + with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio()) @@ -1535,7 +1535,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): aspect_ratio = self.width / self.height tile_size = largest_tile_size_available(self.width, self.height) with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): + with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): devices.torch_gc() samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) -- cgit v1.2.3 From 97431f29feb17ffc96ca95e9b3efec87be9d8b3a Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 17 Nov 2023 10:05:28 +0900 Subject: fix double gc and decoding with unet context --- modules/processing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 2fda7f33..36c2be5e 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - with hypertile_context_unet(p.sd_model.model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): + with hypertile_context_vae(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), opts=shared.opts): x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() @@ -1146,11 +1146,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): tile_size = largest_tile_size_available(self.width, self.height) with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): - devices.torch_gc() samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) del x if not self.enable_hr: return samples + devices.torch_gc() if self.latent_scale_mode is None: with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): @@ -1536,7 +1536,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): tile_size = largest_tile_size_available(self.width, self.height) with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): - devices.torch_gc() samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) if self.mask is not None: -- cgit v1.2.3 From d2e0c1ca132f4f0d98b77397a9f353d4ad8e7c4b Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 26 Nov 2023 10:51:45 +0300 Subject: rework hypertile into a built-in extension --- README.md | 1 + extensions-builtin/hypertile/hypertile.py | 221 +++++++++------------ .../hypertile/scripts/hypertile_script.py | 73 +++++++ modules/processing.py | 37 ++-- modules/shared_options.py | 8 - 5 files changed, 186 insertions(+), 154 deletions(-) create mode 100644 extensions-builtin/hypertile/scripts/hypertile_script.py (limited to 'modules/processing.py') diff --git a/README.md b/README.md index 25ba070e..3b3f93ad 100644 --- a/README.md +++ b/README.md @@ -174,5 +174,6 @@ Licenses for borrowed code can be found in `Settings -> Licenses` screen, and al - TAESD - Ollin Boer Bohan - https://github.com/madebyollin/taesd - LyCORIS - KohakuBlueleaf - Restart sampling - lambertae - https://github.com/Newbeeer/diffusion_restart_sampling +- Hypertile - tfernd - https://github.com/tfernd/HyperTile - Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user. - (You) diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py index be898fce..a40c1311 100644 --- a/extensions-builtin/hypertile/hypertile.py +++ b/extensions-builtin/hypertile/hypertile.py @@ -1,10 +1,13 @@ """ Hypertile module for splitting attention layers in SD-1.5 U-Net and SD-1.5 VAE -Warn : The patch works well only if the input image has a width and height that are multiples of 128 -Author : @tfernd Github : https://github.com/tfernd/HyperTile +Warn: The patch works well only if the input image has a width and height that are multiples of 128 +Original author: @tfernd Github: https://github.com/tfernd/HyperTile """ from __future__ import annotations + +import functools +from dataclasses import dataclass from typing import Callable from typing_extensions import Literal @@ -18,6 +21,19 @@ import random from einops import rearrange + +@dataclass +class HypertileParams: + depth = 0 + layer_name = "" + tile_size: int = 0 + swap_size: int = 0 + aspect_ratio: float = 1.0 + forward = None + enabled = False + + + # TODO add SD-XL layers DEPTH_LAYERS = { 0: [ @@ -176,6 +192,7 @@ DEPTH_LAYERS_XL = { RNG_INSTANCE = random.Random() + def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: """ Returns a random divisor of value that @@ -193,10 +210,13 @@ def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: return ns[idx] + def set_hypertile_seed(seed: int) -> None: RNG_INSTANCE.seed(seed) -def largest_tile_size_available(width:int, height:int) -> int: + +@functools.cache +def largest_tile_size_available(width: int, height: int) -> int: """ Calculates the largest tile size available for a given width and height Tile size is always a power of 2 @@ -207,6 +227,7 @@ def largest_tile_size_available(width:int, height:int) -> int: largest_tile_size_available *= 2 return largest_tile_size_available + def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]: """ Finds h and w such that h*w = hw and h/w = aspect_ratio @@ -219,6 +240,7 @@ def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]: closest_pair = pairs[ratios.index(closest_ratio)] # closest pair of divisors to aspect_ratio return closest_pair + @cache def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]: """ @@ -240,132 +262,87 @@ def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]: w = int(w_candidate) return h, w -@contextmanager -def split_attention( - layer: nn.Module, - /, - aspect_ratio: float, # width/height - tile_size: int = 128, # 128 for VAE - swap_size: int = 1, # 1 for VAE - *, - disable: bool = False, - max_depth: Literal[0, 1, 2, 3] = 0, # ! Try 0 or 1 - scale_depth: bool = True, # scale the tile-size depending on the depth - is_sdxl: bool = False, # is the model SD-XL -): - # Hijacks AttnBlock from ldm and Attention from diffusers - - if disable: - logging.info(f"Attention for {layer.__class__.__qualname__} not splitted") - yield - return - - latent_tile_size = max(128, tile_size) // 8 - - def self_attn_forward(forward: Callable, depth: int, layer_name: str, module: nn.Module) -> Callable: - @wraps(forward) - def wrapper(*args, **kwargs): - x = args[0] - - # VAE - if x.ndim == 4: - b, c, h, w = x.shape - - nh = random_divisor(h, latent_tile_size, swap_size) - nw = random_divisor(w, latent_tile_size, swap_size) - - if nh * nw > 1: - x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw) # split into nh * nw tiles - - out = forward(x, *args[1:], **kwargs) - - if nh * nw > 1: - out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw) - - # U-Net - else: - hw: int = x.size(1) - h, w = find_hw_candidates(hw, aspect_ratio) - assert h * w == hw, f"Invalid aspect ratio {aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}" - factor = 2**depth if scale_depth else 1 - nh = random_divisor(h, latent_tile_size * factor, swap_size) - nw = random_divisor(w, latent_tile_size * factor, swap_size) +def self_attn_forward(params: HypertileParams, scale_depth=True) -> Callable: + + @wraps(params.forward) + def wrapper(*args, **kwargs): + if not params.enabled: + return params.forward(*args, **kwargs) - module._split_sizes_hypertile.append((nh, nw)) # type: ignore + latent_tile_size = max(128, params.tile_size) // 8 + x = args[0] - if nh * nw > 1: - x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) + # VAE + if x.ndim == 4: + b, c, h, w = x.shape - out = forward(x, *args[1:], **kwargs) + nh = random_divisor(h, latent_tile_size, params.swap_size) + nw = random_divisor(w, latent_tile_size, params.swap_size) - if nh * nw > 1: - out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) - out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) + if nh * nw > 1: + x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw) # split into nh * nw tiles - return out + out = params.forward(x, *args[1:], **kwargs) - return wrapper + if nh * nw > 1: + out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw) - # Handle hijacking the forward method and recovering afterwards - try: - if is_sdxl: - layers = DEPTH_LAYERS_XL + # U-Net else: - layers = DEPTH_LAYERS - for depth in range(max_depth + 1): - for layer_name, module in layer.named_modules(): + hw: int = x.size(1) + h, w = find_hw_candidates(hw, params.aspect_ratio) + assert h * w == hw, f"Invalid aspect ratio {params.aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}" + + factor = 2 ** params.depth if scale_depth else 1 + nh = random_divisor(h, latent_tile_size * factor, params.swap_size) + nw = random_divisor(w, latent_tile_size * factor, params.swap_size) + + if nh * nw > 1: + x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) + + out = params.forward(x, *args[1:], **kwargs) + + if nh * nw > 1: + out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) + out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) + + return out + + return wrapper + + +def hypertile_hook_model(model: nn.Module, width, height, *, enable=False, tile_size_max=128, swap_size=1, max_depth=3, is_sdxl=False): + hypertile_layers = getattr(model, "__webui_hypertile_layers", None) + if hypertile_layers is None: + if not enable: + return + + hypertile_layers = {} + layers = DEPTH_LAYERS_XL if is_sdxl else DEPTH_LAYERS + + for depth in range(4): + for layer_name, module in model.named_modules(): if any(layer_name.endswith(try_name) for try_name in layers[depth]): - # print input shape for debugging - logging.debug(f"HyperTile hijacking attention layer at depth {depth}: {layer_name}") - # hijack - module._original_forward_hypertile = module.forward - module.forward = self_attn_forward(module.forward, depth, layer_name, module) - module._split_sizes_hypertile = [] - yield - finally: - for layer_name, module in layer.named_modules(): - # remove hijack - if hasattr(module, "_original_forward_hypertile"): - if module._split_sizes_hypertile: - logging.debug(f"layer {layer_name} splitted with ({module._split_sizes_hypertile})") - # recover - module.forward = module._original_forward_hypertile - del module._original_forward_hypertile - del module._split_sizes_hypertile - -def hypertile_context_vae(model:nn.Module, aspect_ratio:float, tile_size:int, opts): - """ - Returns context manager for VAE - """ - enabled = opts.hypertile_split_vae_attn - swap_size = opts.hypertile_swap_size_vae - max_depth = opts.hypertile_max_depth_vae - tile_size_max = opts.hypertile_max_tile_vae - return split_attention( - model, - aspect_ratio=aspect_ratio, - tile_size=min(tile_size, tile_size_max), - swap_size=swap_size, - disable=not enabled, - max_depth=max_depth, - is_sdxl=False, - ) - -def hypertile_context_unet(model:nn.Module, aspect_ratio:float, tile_size:int, opts, is_sdxl:bool): - """ - Returns context manager for U-Net - """ - enabled = opts.hypertile_split_unet_attn - swap_size = opts.hypertile_swap_size_unet - max_depth = opts.hypertile_max_depth_unet - tile_size_max = opts.hypertile_max_tile_unet - return split_attention( - model, - aspect_ratio=aspect_ratio, - tile_size=min(tile_size, tile_size_max), - swap_size=swap_size, - disable=not enabled, - max_depth=max_depth, - is_sdxl=is_sdxl, - ) + params = HypertileParams() + module.__webui_hypertile_params = params + params.forward = module.forward + params.depth = depth + params.layer_name = layer_name + module.forward = self_attn_forward(params) + + hypertile_layers[layer_name] = 1 + + model.__webui_hypertile_layers = hypertile_layers + + aspect_ratio = width / height + tile_size = min(largest_tile_size_available(width, height), tile_size_max) + + for layer_name, module in model.named_modules(): + if layer_name in hypertile_layers: + params = module.__webui_hypertile_params + + params.tile_size = tile_size + params.swap_size = swap_size + params.aspect_ratio = aspect_ratio + params.enabled = enable and params.depth <= max_depth diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py new file mode 100644 index 00000000..3cc29cd1 --- /dev/null +++ b/extensions-builtin/hypertile/scripts/hypertile_script.py @@ -0,0 +1,73 @@ +import hypertile +from modules import scripts, script_callbacks, shared + + +class ScriptHypertile(scripts.Script): + name = "Hypertile" + + def title(self): + return self.name + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def process(self, p, *args): + hypertile.set_hypertile_seed(p.all_seeds[0]) + + configure_hypertile(p.width, p.height, enable_unet=shared.opts.hypertile_enable_unet) + + def before_hr(self, p, *args): + configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet) + + +def configure_hypertile(width, height, enable_unet=True): + hypertile.hypertile_hook_model( + shared.sd_model.first_stage_model, + width, + height, + swap_size=shared.opts.hypertile_swap_size_vae, + max_depth=shared.opts.hypertile_max_depth_vae, + tile_size_max=shared.opts.hypertile_max_tile_vae, + enable=shared.opts.hypertile_enable_vae, + ) + + hypertile.hypertile_hook_model( + shared.sd_model.model, + width, + height, + swap_size=shared.opts.hypertile_swap_size_unet, + max_depth=shared.opts.hypertile_max_depth_unet, + tile_size_max=shared.opts.hypertile_max_tile_unet, + enable=enable_unet, + is_sdxl=shared.sd_model.is_sdxl + ) + + +def on_ui_settings(): + import gradio as gr + + options = { + "hypertile_explanation": shared.OptionHTML(""" + Hypertile optimizes the self-attention layer within U-Net and VAE models, + resulting in a reduction in computation time ranging from 1 to 4 times. The larger the generated image is, the greater the + benefit. + """), + + "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net").info("noticeable change in details of the generated picture; if enabled, overrides the setting below"), + "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass"), + "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}), + "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}), + "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}), + + "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE").info("minimal change in the generated picture"), + "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}), + "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}), + "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}), + } + + for name, opt in options.items(): + opt.section = ('hypertile', "Hypertile") + shared.opts.add_option(name, opt) + + +script_callbacks.on_ui_settings(on_ui_settings) diff --git a/modules/processing.py b/modules/processing.py index 36c2be5e..ac58ef86 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -24,7 +24,6 @@ from modules.shared import opts, cmd_opts, state import modules.shared as shared import modules.paths as paths import modules.face_restoration -from modules.hypertile import set_hypertile_seed, largest_tile_size_available, hypertile_context_unet, hypertile_context_vae import modules.images as images import modules.styles import modules.sd_models as sd_models @@ -861,8 +860,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: p.comment(comment) p.extra_generation_params.update(model_hijack.extra_generation_params) - set_hypertile_seed(p.seed) - # add batch size + hypertile status to information to reproduce the run + if p.n_iter > 1: shared.state.job = f"Batch {n+1} out of {p.n_iter}" @@ -874,8 +872,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - with hypertile_context_vae(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), opts=shared.opts): - x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) + x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) @@ -1141,25 +1138,23 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model) - aspect_ratio = self.width / self.height + x = self.rng.next() - tile_size = largest_tile_size_available(self.width, self.height) - with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) del x + if not self.enable_hr: return samples devices.torch_gc() if self.latent_scale_mode is None: - with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) + decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) else: decoded_samples = None with sd_models.SkipWritingToConfig(): sd_models.reload_model_weights(info=self.hr_checkpoint_info) + return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts): @@ -1244,18 +1239,15 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): if self.scripts is not None: self.scripts.before_hr(self) - tile_size = largest_tile_size_available(target_width, target_height) - aspect_ratio = self.width / self.height - with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): - samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) + + samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio()) self.sampler = None devices.torch_gc() - with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) + + decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) self.is_hr_pass = False return decoded_samples @@ -1532,11 +1524,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.initial_noise_multiplier != 1.0: self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier x *= self.initial_noise_multiplier - aspect_ratio = self.width / self.height - tile_size = largest_tile_size_available(self.width, self.height) - with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts): - with hypertile_context_unet(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts): - samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) + + samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) if self.mask is not None: samples = samples * self.nmask + self.init_latent * self.mask diff --git a/modules/shared_options.py b/modules/shared_options.py index 28a48906..d40db530 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -200,14 +200,6 @@ options_templates.update(options_section(('optimizations', "Optimizations"), { "pad_cond_uncond": OptionInfo(False, "Pad prompt/negative prompt to be same length", infotext='Pad conds').info("improves performance when prompt and negative prompt have different lengths; changes seeds"), "persistent_cond_cache": OptionInfo(True, "Persistent cond cache").info("do not recalculate conds from prompts if prompts have not changed since previous calculation"), "batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"), - "hypertile_split_unet_attn" : OptionInfo(False, "Split attention in Unet with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"), - "hypertile_split_vae_attn": OptionInfo(False, "Split attention in VAE with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"), - "hypertile_max_depth_vae" : OptionInfo(3, "Max depth for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), - "hypertile_max_depth_unet" : OptionInfo(3, "Max depth for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), - "hypertile_max_tile_vae" : OptionInfo(128, "Max tile size for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).link("Github", "https://github.com/tfernd/HyperTile"), - "hypertile_max_tile_unet" : OptionInfo(256, "Max tile size for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).link("Github", "https://github.com/tfernd/HyperTile"), - "hypertile_swap_size_unet": OptionInfo(3, "Swap size for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), - "hypertile_swap_size_vae": OptionInfo(3, "Swap size for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"), })) options_templates.update(options_section(('compatibility', "Compatibility"), { -- cgit v1.2.3 From dec791d35ddcd02ca33563d3d0355e05e45de8ad Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 15:05:01 -0700 Subject: Removed code which forces the inpainting mask to be 0 or 1. Now fractional values (e.g. 0.5) are accepted. --- modules/processing.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index e124e7f0..317458f5 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -83,7 +83,7 @@ def apply_overlay(image, paste_loc, index, overlays): def create_binary_mask(image): if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255): - image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0) + image = image.split()[-1].convert("L") else: image = image.convert('L') return image @@ -319,9 +319,6 @@ class StableDiffusionProcessing: conditioning_mask = np.array(image_mask.convert("L")) conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) - - # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 - conditioning_mask = torch.round(conditioning_mask) else: conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:]) @@ -1504,7 +1501,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 latmask = latmask[0] - latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype) -- cgit v1.2.3 From bbba133f054706c3668b7d03b0e6d0afc15705db Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 15:09:43 -0700 Subject: Removed conflicting step that replaces the softly inpainted latents with a naive blend with the original latents. --- modules/processing.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 317458f5..ae894f1a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1523,9 +1523,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) - if self.mask is not None: - samples = samples * self.nmask + self.init_latent * self.mask - del x devices.torch_gc() -- cgit v1.2.3 From a6e584645305c0a91a3d46f73546e191b249210f Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 16:13:42 -0700 Subject: Nerfs the aggressive post-processing step of overlaying the original image. --- modules/processing.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index ae894f1a..12e08e87 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1412,7 +1412,12 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image_mask = Image.fromarray(np_mask) if self.inpaint_full_res: - self.mask_for_overlay = image_mask + np_mask = np.array(image_mask).astype(np.float32) + np_mask /= 255 + np_mask = 1-pow(1-np_mask, 100) + np_mask *= 255 + np_mask = np.clip(np_mask, 0, 255).astype(np.uint8) + self.mask_for_overlay = Image.fromarray(np_mask) mask = image_mask.convert('L') crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding) crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) @@ -1423,8 +1428,11 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.paste_to = (x1, y1, x2-x1, y2-y1) else: image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height) - np_mask = np.array(image_mask) - np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8) + np_mask = np.array(image_mask).astype(np.float32) + np_mask /= 255 + np_mask = 1-pow(1-np_mask, 100) + np_mask *= 255 + np_mask = np.clip(np_mask, 0, 255).astype(np.uint8) self.mask_for_overlay = Image.fromarray(np_mask) self.overlay_images = [] -- cgit v1.2.3 From debf836fcc8d9becc3da8b1a29e33f40b0d9ef3e Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 16:15:36 -0700 Subject: Added UI elements to control blending parameters. --- modules/img2img.py | 48 +++++++++++++++++++++++++++++++++++++++- modules/processing.py | 3 +++ modules/sd_samplers_common.py | 3 +++ modules/ui.py | 9 ++++++++ scripts/outpainting_mk_2.py | 10 +++++++-- scripts/poor_mans_outpainting.py | 11 +++++++-- test/test_img2img.py | 3 +++ 7 files changed, 82 insertions(+), 5 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/img2img.py b/modules/img2img.py index 1519e132..240d0588 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -116,7 +116,47 @@ def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=Fal process_images(p) -def img2img(id_task: str, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, steps: int, sampler_name: str, mask_blur: int, mask_alpha: float, inpainting_fill: int, n_iter: int, batch_size: int, cfg_scale: float, image_cfg_scale: float, denoising_strength: float, selected_scale_tab: int, height: int, width: int, scale_by: float, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, img2img_batch_use_png_info: bool, img2img_batch_png_info_props: list, img2img_batch_png_info_dir: str, request: gr.Request, *args): +def img2img(id_task: str, + mode: int, + prompt: str, + negative_prompt: str, + prompt_styles, + init_img, + sketch, + init_img_with_mask, + inpaint_color_sketch, + inpaint_color_sketch_orig, + init_img_inpaint, + init_mask_inpaint, + steps: int, + sampler_name: str, + mask_blur: int, + mask_alpha: float, + mask_blend_power: float, + mask_blend_scale: float, + mask_blend_offset: float, + inpainting_fill: int, + n_iter: int, + batch_size: int, + cfg_scale: float, + image_cfg_scale: float, + denoising_strength: float, + selected_scale_tab: int, + height: int, + width: int, + scale_by: float, + resize_mode: int, + inpaint_full_res: bool, + inpaint_full_res_padding: int, + inpainting_mask_invert: int, + img2img_batch_input_dir: str, + img2img_batch_output_dir: str, + img2img_batch_inpaint_mask_dir: str, + override_settings_texts, + img2img_batch_use_png_info: bool, + img2img_batch_png_info_props: list, + img2img_batch_png_info_dir: str, + request: gr.Request, *args): override_settings = create_override_settings_dict(override_settings_texts) is_batch = mode == 5 @@ -174,6 +214,9 @@ def img2img(id_task: str, mode: int, prompt: str, negative_prompt: str, prompt_s init_images=[image], mask=mask, mask_blur=mask_blur, + mask_blend_power=mask_blend_power, + mask_blend_scale=mask_blend_scale, + mask_blend_offset=mask_blend_offset, inpainting_fill=inpainting_fill, resize_mode=resize_mode, denoising_strength=denoising_strength, @@ -194,6 +237,9 @@ def img2img(id_task: str, mode: int, prompt: str, negative_prompt: str, prompt_s if mask: p.extra_generation_params["Mask blur"] = mask_blur + p.extra_generation_params["Mask blend power"] = mask_blend_power + p.extra_generation_params["Mask blend scale"] = mask_blend_scale + p.extra_generation_params["Mask blend offset"] = mask_blend_offset with closing(p): if is_batch: diff --git a/modules/processing.py b/modules/processing.py index 12e08e87..da4d6fda 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1349,6 +1349,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): mask_blur_x: int = 4 mask_blur_y: int = 4 mask_blur: int = None + mask_blend_power: float = 1 + mask_blend_scale: float = 1 + mask_blend_offset: float = 0 inpainting_fill: int = 0 inpaint_full_res: bool = True inpaint_full_res_padding: int = 0 diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 58efcad2..8904da2f 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -277,6 +277,9 @@ class Sampler: self.model_wrap_cfg.p = p self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None + self.model_wrap_cfg.mask_blend_power = p.mask_blend_power if hasattr(p, 'mask_blend_power') else None + self.model_wrap_cfg.mask_blend_scale = p.mask_blend_scale if hasattr(p, 'mask_blend_scale') else None + self.model_wrap_cfg.mask_blend_offset = p.mask_blend_offset if hasattr(p, 'mask_blend_offset') else None self.model_wrap_cfg.step = 0 self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None) self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0) diff --git a/modules/ui.py b/modules/ui.py index 579bab98..86c13086 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -732,6 +732,9 @@ def create_ui(): with FormRow(): mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur") mask_alpha = gr.Slider(label="Mask transparency", visible=False, elem_id="img2img_mask_alpha") + mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power") + mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_scale") + mask_blend_offset = gr.Slider(label='Mask blend offset', minimum=-4, maximum=4, step=0.1, value=0, elem_id="img2img_mask_blend_offset") with FormRow(): inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode") @@ -781,6 +784,9 @@ def create_ui(): sampler_name, mask_blur, mask_alpha, + mask_blend_power, + mask_blend_scale, + mask_blend_offset, inpainting_fill, batch_count, batch_size, @@ -879,6 +885,9 @@ def create_ui(): (toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update()), (denoising_strength, "Denoising strength"), (mask_blur, "Mask blur"), + (mask_blend_power, "Mask blend power"), + (mask_blend_scale, "Mask blend scale"), + (mask_blend_offset, "Mask blend offset"), *scripts.scripts_img2img.infotext_fields ] parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings) diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py index c98ab480..6aa97edf 100644 --- a/scripts/outpainting_mk_2.py +++ b/scripts/outpainting_mk_2.py @@ -133,13 +133,16 @@ class Script(scripts.Script): pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels")) mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8, elem_id=self.elem_id("mask_blur")) + mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) + mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale")) + mask_blend_offset = gr.Slider(label='Mask blend scale', minimum=-4, maximum=4, step=0.1, value=1, elem_id=self.elem_id("mask_blend_offset")) direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction")) noise_q = gr.Slider(label="Fall-off exponent (lower=higher detail)", minimum=0.0, maximum=4.0, step=0.01, value=1.0, elem_id=self.elem_id("noise_q")) color_variation = gr.Slider(label="Color variation", minimum=0.0, maximum=1.0, step=0.01, value=0.05, elem_id=self.elem_id("color_variation")) - return [info, pixels, mask_blur, direction, noise_q, color_variation] + return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, direction, noise_q, color_variation] - def run(self, p, _, pixels, mask_blur, direction, noise_q, color_variation): + def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, direction, noise_q, color_variation): initial_seed_and_info = [None, None] process_width = p.width @@ -167,6 +170,9 @@ class Script(scripts.Script): p.mask_blur_x = mask_blur_x*4 p.mask_blur_y = mask_blur_y*4 + p.mask_blend_power = mask_blend_power + p.mask_blend_scale = mask_blend_scale + p.mask_blend_offset = mask_blend_offset init_img = p.init_images[0] target_w = math.ceil((init_img.width + left + right) / 64) * 64 diff --git a/scripts/poor_mans_outpainting.py b/scripts/poor_mans_outpainting.py index ea0632b6..b10140f1 100644 --- a/scripts/poor_mans_outpainting.py +++ b/scripts/poor_mans_outpainting.py @@ -22,16 +22,23 @@ class Script(scripts.Script): pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels")) mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id=self.elem_id("mask_blur")) + mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) + mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale")) + mask_blend_offset = gr.Slider(label='Mask blend offset', minimum=-4, maximum=4, step=0.1, value=0, elem_id=self.elem_id("mask_blend_offset")) inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", elem_id=self.elem_id("inpainting_fill")) direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction")) - return [pixels, mask_blur, inpainting_fill, direction] + return [pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, inpainting_fill, direction] - def run(self, p, pixels, mask_blur, inpainting_fill, direction): + def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, inpainting_fill, direction): initial_seed = None initial_info = None p.mask_blur = mask_blur * 2 + p.mask_blend_power = mask_blend_power + p.mask_blend_scale = mask_blend_scale + p.mask_blend_offset = mask_blend_offset + p.inpainting_fill = inpainting_fill p.inpaint_full_res = False diff --git a/test/test_img2img.py b/test/test_img2img.py index 117d2d1e..6289e59e 100644 --- a/test/test_img2img.py +++ b/test/test_img2img.py @@ -24,6 +24,9 @@ def simple_img2img_request(img2img_basic_image_base64): "inpainting_mask_invert": False, "mask": None, "mask_blur": 4, + "mask_blend_power": 1, + "mask_blend_scale": 1, + "mask_blend_offset": 0, "n_iter": 1, "negative_prompt": "", "override_settings": {}, -- cgit v1.2.3 From c5c7fa06aae1ae9f8b6d29ae2da3874921d4729b Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 22:35:07 -0700 Subject: Added slider for detail preservation strength, removed largely needless offset parameter, changed labels in UI and for saving to/pasting data from PNG files. --- modules/img2img.py | 10 +++++----- modules/processing.py | 2 +- modules/sd_samplers_cfg_denoiser.py | 11 +++++------ modules/sd_samplers_common.py | 2 +- modules/ui.py | 14 +++++++------- scripts/outpainting_mk_2.py | 12 ++++++------ scripts/poor_mans_outpainting.py | 12 ++++++------ test/test_img2img.py | 2 +- 8 files changed, 32 insertions(+), 33 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/img2img.py b/modules/img2img.py index 240d0588..023808d6 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -134,7 +134,7 @@ def img2img(id_task: str, mask_alpha: float, mask_blend_power: float, mask_blend_scale: float, - mask_blend_offset: float, + inpaint_detail_preservation: float, inpainting_fill: int, n_iter: int, batch_size: int, @@ -216,7 +216,7 @@ def img2img(id_task: str, mask_blur=mask_blur, mask_blend_power=mask_blend_power, mask_blend_scale=mask_blend_scale, - mask_blend_offset=mask_blend_offset, + inpaint_detail_preservation=inpaint_detail_preservation, inpainting_fill=inpainting_fill, resize_mode=resize_mode, denoising_strength=denoising_strength, @@ -237,9 +237,9 @@ def img2img(id_task: str, if mask: p.extra_generation_params["Mask blur"] = mask_blur - p.extra_generation_params["Mask blend power"] = mask_blend_power - p.extra_generation_params["Mask blend scale"] = mask_blend_scale - p.extra_generation_params["Mask blend offset"] = mask_blend_offset + p.extra_generation_params["Mask blending bias"] = mask_blend_power + p.extra_generation_params["Mask blending preservation"] = mask_blend_scale + p.extra_generation_params["Mask blending detail boost"] = inpaint_detail_preservation with closing(p): if is_batch: diff --git a/modules/processing.py b/modules/processing.py index da4d6fda..361e8b05 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1351,7 +1351,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): mask_blur: int = None mask_blend_power: float = 1 mask_blend_scale: float = 1 - mask_blend_offset: float = 0 + inpaint_detail_preservation: float = 16 inpainting_fill: int = 0 inpaint_full_res: bool = True inpaint_full_res_padding: int = 0 diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index c4d6fda6..598cd487 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -45,7 +45,7 @@ class CFGDenoiser(torch.nn.Module): self.nmask = None self.mask_blend_power = 1 self.mask_blend_scale = 1 - self.mask_blend_offset = 0 + self.inpaint_detail_preservation = 16 self.init_latent = None self.steps = None """number of steps as specified by user in UI""" @@ -105,14 +105,13 @@ class CFGDenoiser(torch.nn.Module): # Record the original latent vector magnitudes. # We bring them to a power so that larger magnitudes are favored over smaller ones. # 64-bit operations are used here to allow large exponents. - detail_preservation = 32 - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** detail_preservation - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** detail_preservation + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation one_minus_t = 1 - t # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / detail_preservation) + interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / self.inpaint_detail_preservation) # Linearly interpolate the image vectors. image_interp = a * one_minus_t + b * t @@ -142,7 +141,7 @@ class CFGDenoiser(torch.nn.Module): NOTE: "mask" is not used """ - return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale + self.mask_blend_offset) + return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale) if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 8904da2f..ecd8ab0a 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -279,7 +279,7 @@ class Sampler: self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None self.model_wrap_cfg.mask_blend_power = p.mask_blend_power if hasattr(p, 'mask_blend_power') else None self.model_wrap_cfg.mask_blend_scale = p.mask_blend_scale if hasattr(p, 'mask_blend_scale') else None - self.model_wrap_cfg.mask_blend_offset = p.mask_blend_offset if hasattr(p, 'mask_blend_offset') else None + self.model_wrap_cfg.inpaint_detail_preservation = p.inpaint_detail_preservation if hasattr(p, 'inpaint_detail_preservation') else None self.model_wrap_cfg.step = 0 self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None) self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0) diff --git a/modules/ui.py b/modules/ui.py index 86c13086..f5e20147 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -732,9 +732,9 @@ def create_ui(): with FormRow(): mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur") mask_alpha = gr.Slider(label="Mask transparency", visible=False, elem_id="img2img_mask_alpha") - mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power") - mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_scale") - mask_blend_offset = gr.Slider(label='Mask blend offset', minimum=-4, maximum=4, step=0.1, value=0, elem_id="img2img_mask_blend_offset") + mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power") + mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=1, elem_id="img2img_mask_blend_scale") + inpaint_detail_preservation = gr.Slider(label='Blending detail boost', minimum=1, maximum=32, step=0.5, value=16, elem_id="img2img_mask_blend_offset") with FormRow(): inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode") @@ -786,7 +786,7 @@ def create_ui(): mask_alpha, mask_blend_power, mask_blend_scale, - mask_blend_offset, + inpaint_detail_preservation, inpainting_fill, batch_count, batch_size, @@ -885,9 +885,9 @@ def create_ui(): (toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update()), (denoising_strength, "Denoising strength"), (mask_blur, "Mask blur"), - (mask_blend_power, "Mask blend power"), - (mask_blend_scale, "Mask blend scale"), - (mask_blend_offset, "Mask blend offset"), + (mask_blend_power, "Mask blending bias"), + (mask_blend_scale, "Mask blending preservation"), + (inpaint_detail_preservation, "Mask blending detail boost"), *scripts.scripts_img2img.infotext_fields ] parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings) diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py index 6aa97edf..54d95825 100644 --- a/scripts/outpainting_mk_2.py +++ b/scripts/outpainting_mk_2.py @@ -133,16 +133,16 @@ class Script(scripts.Script): pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels")) mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8, elem_id=self.elem_id("mask_blur")) - mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) - mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale")) - mask_blend_offset = gr.Slider(label='Mask blend scale', minimum=-4, maximum=4, step=0.1, value=1, elem_id=self.elem_id("mask_blend_offset")) + mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) + mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale")) + inpaint_detail_preservation = gr.Slider(label='Blending detail boost', minimum=1, maximum=32, step=0.5, value=16, elem_id=self.elem_id("inpaint_detail_preservation")) direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction")) noise_q = gr.Slider(label="Fall-off exponent (lower=higher detail)", minimum=0.0, maximum=4.0, step=0.01, value=1.0, elem_id=self.elem_id("noise_q")) color_variation = gr.Slider(label="Color variation", minimum=0.0, maximum=1.0, step=0.01, value=0.05, elem_id=self.elem_id("color_variation")) - return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, direction, noise_q, color_variation] + return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation] - def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, direction, noise_q, color_variation): + def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation): initial_seed_and_info = [None, None] process_width = p.width @@ -172,7 +172,7 @@ class Script(scripts.Script): p.mask_blur_y = mask_blur_y*4 p.mask_blend_power = mask_blend_power p.mask_blend_scale = mask_blend_scale - p.mask_blend_offset = mask_blend_offset + p.inpaint_detail_preservation = inpaint_detail_preservation init_img = p.init_images[0] target_w = math.ceil((init_img.width + left + right) / 64) * 64 diff --git a/scripts/poor_mans_outpainting.py b/scripts/poor_mans_outpainting.py index b10140f1..e3acb3d4 100644 --- a/scripts/poor_mans_outpainting.py +++ b/scripts/poor_mans_outpainting.py @@ -22,22 +22,22 @@ class Script(scripts.Script): pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels")) mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id=self.elem_id("mask_blur")) - mask_blend_power = gr.Slider(label='Mask blend power', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) - mask_blend_scale = gr.Slider(label='Mask blend scale', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale")) - mask_blend_offset = gr.Slider(label='Mask blend offset', minimum=-4, maximum=4, step=0.1, value=0, elem_id=self.elem_id("mask_blend_offset")) + mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) + mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_scale")) + inpaint_detail_preservation = gr.Slider(label='Blending detail boost', minimum=1, maximum=32, step=0.5, value=16, elem_id=self.elem_id("inpaint_detail_preservation")) inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", elem_id=self.elem_id("inpainting_fill")) direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction")) - return [pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, inpainting_fill, direction] + return [pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction] - def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, mask_blend_offset, inpainting_fill, direction): + def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction): initial_seed = None initial_info = None p.mask_blur = mask_blur * 2 p.mask_blend_power = mask_blend_power p.mask_blend_scale = mask_blend_scale - p.mask_blend_offset = mask_blend_offset + p.inpaint_detail_preservation = inpaint_detail_preservation p.inpainting_fill = inpainting_fill p.inpaint_full_res = False diff --git a/test/test_img2img.py b/test/test_img2img.py index 6289e59e..88b06eb8 100644 --- a/test/test_img2img.py +++ b/test/test_img2img.py @@ -26,7 +26,7 @@ def simple_img2img_request(img2img_basic_image_base64): "mask_blur": 4, "mask_blend_power": 1, "mask_blend_scale": 1, - "mask_blend_offset": 0, + "inpaint_detail_preservation": 16, "n_iter": 1, "negative_prompt": "", "override_settings": {}, -- cgit v1.2.3 From c7a1ff87207544dd4bcf3aefffa67a4a38678c16 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 23:31:10 -0700 Subject: Tweaked default values. --- modules/processing.py | 4 ++-- modules/sd_samplers_cfg_denoiser.py | 4 ++-- test/test_img2img.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 361e8b05..92fdebad 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1350,8 +1350,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): mask_blur_y: int = 4 mask_blur: int = None mask_blend_power: float = 1 - mask_blend_scale: float = 1 - inpaint_detail_preservation: float = 16 + mask_blend_scale: float = 0.5 + inpaint_detail_preservation: float = 4 inpainting_fill: int = 0 inpaint_full_res: bool = True inpaint_full_res_padding: int = 0 diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 598cd487..ceb612d7 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -44,8 +44,8 @@ class CFGDenoiser(torch.nn.Module): self.mask = None self.nmask = None self.mask_blend_power = 1 - self.mask_blend_scale = 1 - self.inpaint_detail_preservation = 16 + self.mask_blend_scale = 0.5 + self.inpaint_detail_preservation = 4 self.init_latent = None self.steps = None """number of steps as specified by user in UI""" diff --git a/test/test_img2img.py b/test/test_img2img.py index 88b06eb8..5cda2dba 100644 --- a/test/test_img2img.py +++ b/test/test_img2img.py @@ -25,8 +25,8 @@ def simple_img2img_request(img2img_basic_image_base64): "mask": None, "mask_blur": 4, "mask_blend_power": 1, - "mask_blend_scale": 1, - "inpaint_detail_preservation": 16, + "mask_blend_scale": 0.5, + "inpaint_detail_preservation": 4, "n_iter": 1, "negative_prompt": "", "override_settings": {}, -- cgit v1.2.3 From b58d061e41cba6fb91910d310d53e175d0511650 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 2 Dec 2023 08:33:28 +0300 Subject: infotext updates: add option to disregard certain infotext fields, add option to not include VAE in infotext, add explanation to infotext settings page, move some options to infotext settings page --- modules/generation_parameters_copypaste.py | 13 +++++++++---- modules/processing.py | 4 ++-- modules/shared_items.py | 16 ++++++++++++++++ modules/shared_options.py | 20 ++++++++++++++------ 4 files changed, 41 insertions(+), 12 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py index 0a606515..4efe53e0 100644 --- a/modules/generation_parameters_copypaste.py +++ b/modules/generation_parameters_copypaste.py @@ -1,3 +1,4 @@ +from __future__ import annotations import base64 import io import json @@ -15,9 +16,6 @@ re_imagesize = re.compile(r"^(\d+)x(\d+)$") re_hypernet_hash = re.compile("\(([0-9a-f]+)\)$") type_of_gr_update = type(gr.update()) -paste_fields = {} -registered_param_bindings = [] - class ParamBinding: def __init__(self, paste_button, tabname, source_text_component=None, source_image_component=None, source_tabname=None, override_settings_component=None, paste_field_names=None): @@ -30,6 +28,10 @@ class ParamBinding: self.paste_field_names = paste_field_names or [] +paste_fields: dict[str, dict] = {} +registered_param_bindings: list[ParamBinding] = [] + + def reset(): paste_fields.clear() registered_param_bindings.clear() @@ -113,7 +115,6 @@ def register_paste_params_button(binding: ParamBinding): def connect_paste_params_buttons(): - binding: ParamBinding for binding in registered_param_bindings: destination_image_component = paste_fields[binding.tabname]["init_img"] fields = paste_fields[binding.tabname]["fields"] @@ -313,6 +314,9 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model if "VAE Decoder" not in res: res["VAE Decoder"] = "Full" + skip = set(shared.opts.infotext_skip_pasting) + res = {k: v for k, v in res.items() if k not in skip} + return res @@ -443,3 +447,4 @@ def connect_paste(button, paste_fields, input_comp, override_settings_component, outputs=[], show_progress=False, ) + diff --git a/modules/processing.py b/modules/processing.py index ac58ef86..5ab6ddde 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -679,8 +679,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "Size": f"{p.width}x{p.height}", "Model hash": p.sd_model_hash if opts.add_model_hash_to_info else None, "Model": p.sd_model_name if opts.add_model_name_to_info else None, - "VAE hash": p.sd_vae_hash if opts.add_model_hash_to_info else None, - "VAE": p.sd_vae_name if opts.add_model_name_to_info else None, + "VAE hash": p.sd_vae_hash if opts.add_vae_hash_to_info else None, + "VAE": p.sd_vae_name if opts.add_vae_name_to_info else None, "Variation seed": (None if p.subseed_strength == 0 else (p.all_subseeds[0] if use_main_prompt else all_subseeds[index])), "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength), "Seed resize from": (None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"), diff --git a/modules/shared_items.py b/modules/shared_items.py index 5024b426..991971ad 100644 --- a/modules/shared_items.py +++ b/modules/shared_items.py @@ -66,6 +66,22 @@ def reload_hypernetworks(): shared.hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) +def get_infotext_names(): + from modules import generation_parameters_copypaste, shared + res = {} + + for info in shared.opts.data_labels.values(): + if info.infotext: + res[info.infotext] = 1 + + for tab_data in generation_parameters_copypaste.paste_fields.values(): + for _, name in tab_data.get("fields") or []: + if isinstance(name, str): + res[name] = 1 + + return list(res) + + ui_reorder_categories_builtin_items = [ "prompt", "image", diff --git a/modules/shared_options.py b/modules/shared_options.py index 04e68a71..df45fc0a 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -46,8 +46,6 @@ options_templates.update(options_section(('saving-images', "Saving images/grids" "grid_text_inactive_color": OptionInfo("#999999", "Inactive text color for image grids", ui_components.FormColorPicker, {}), "grid_background_color": OptionInfo("#ffffff", "Background color for image grids", ui_components.FormColorPicker, {}), - "enable_pnginfo": OptionInfo(True, "Save text information about generation parameters as chunks to png files"), - "save_txt": OptionInfo(False, "Create a text file next to every image with generation parameters."), "save_images_before_face_restoration": OptionInfo(False, "Save a copy of image before doing face restoration."), "save_images_before_highres_fix": OptionInfo(False, "Save a copy of image before applying highres fix."), "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"), @@ -288,11 +286,21 @@ options_templates.update(options_section(('ui', "User interface", "ui"), { options_templates.update(options_section(('infotext', "Infotext", "ui"), { - "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"), - "add_model_name_to_info": OptionInfo(True, "Add model name to generation information"), - "add_user_name_to_info": OptionInfo(False, "Add user name to generation information when authenticated"), - "add_version_to_infotext": OptionInfo(True, "Add program version to generation information"), + "infotext_explanation": OptionHTML(""" +Infotext is what this software calls the text that contains generation parameters and can be used to generate the same picture again. +It is displayed in UI below the image. To use infotext, paste it into the prompt and click the ↙️ paste button. +"""), + "enable_pnginfo": OptionInfo(True, "Write infotext to metadata of the generated image"), + "save_txt": OptionInfo(False, "Create a text file with infotext next to every generated image"), + + "add_model_name_to_info": OptionInfo(True, "Add model name to infotext"), + "add_model_hash_to_info": OptionInfo(True, "Add model hash to infotext"), + "add_vae_name_to_info": OptionInfo(True, "Add VAE name to infotext"), + "add_vae_hash_to_info": OptionInfo(True, "Add VAE hash to infotext"), + "add_user_name_to_info": OptionInfo(False, "Add user name to infotext when authenticated"), + "add_version_to_infotext": OptionInfo(True, "Add program version to infotext"), "disable_weights_auto_swap": OptionInfo(True, "Disregard checkpoint information from pasted infotext").info("when reading generation parameters from text into UI"), + "infotext_skip_pasting": OptionInfo([], "Disregard fields from pasted infotext", ui_components.DropdownMulti, lambda: {"choices": shared_items.get_infotext_names()}), "infotext_styles": OptionInfo("Apply if any", "Infer styles from prompts of pasted infotext", gr.Radio, {"choices": ["Ignore", "Apply", "Discard", "Apply if any"]}).info("when reading generation parameters from text into UI)").html("""
  • Ignore: keep prompt and styles dropdown as it is.
  • Apply: remove style text from prompt, always replace styles dropdown value with found styles (even if none are found).
  • -- cgit v1.2.3 From 83e8c322762c545fd589c060811379582926060f Mon Sep 17 00:00:00 2001 From: catboxanon <122327233+catboxanon@users.noreply.github.com> Date: Sat, 2 Dec 2023 13:30:53 -0500 Subject: Fix `save_samples` being checked early when saving masked composite --- modules/processing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 5ab6ddde..4f265801 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -938,14 +938,14 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if opts.enable_pnginfo: image.info["parameters"] = text output_images.append(image) - if save_samples and hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): + if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): image_mask = p.mask_for_overlay.convert('RGB') image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') - if opts.save_mask: + if save_samples and opts.save_mask: images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask") - if opts.save_mask_composite: + if save_samples and opts.save_mask_composite: images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite") if opts.return_mask: -- cgit v1.2.3 From 73ab982d1b7394574d1cf2e0a151bc457eeed769 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Sat, 2 Dec 2023 21:07:02 -0700 Subject: Blend masks are now produced afterward, based on an estimate of the visual difference between the original and modified latent images. This should remove ghosting and clipping artifacts from masks, while preserving the details of largely unchanged content. --- modules/processing.py | 119 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 29 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 92fdebad..ad716e11 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field import torch import numpy as np -from PIL import Image, ImageOps +from PIL import Image, ImageOps, ImageFilter import random import cv2 from skimage import exposure @@ -62,6 +62,16 @@ def apply_color_correction(correction, original_image): return image.convert('RGB') +def uncrop(image, dest_size, paste_loc): + x, y, w, h = paste_loc + base_image = Image.new('RGBA', dest_size) + image = images.resize_image(1, image, w, h) + base_image.paste(image, (x, y)) + image = base_image + + return image + + def apply_overlay(image, paste_loc, index, overlays): if overlays is None or index >= len(overlays): return image @@ -69,11 +79,7 @@ def apply_overlay(image, paste_loc, index, overlays): overlay = overlays[index] if paste_loc is not None: - x, y, w, h = paste_loc - base_image = Image.new('RGBA', (overlay.width, overlay.height)) - image = images.resize_image(1, image, w, h) - base_image.paste(image, (x, y)) - image = base_image + image = uncrop(image, (overlay.width, overlay.height), paste_loc) image = image.convert('RGBA') image.alpha_composite(overlay) @@ -140,6 +146,7 @@ class StableDiffusionProcessing: do_not_save_grid: bool = False extra_generation_params: dict[str, Any] = None overlay_images: list = None + masks_for_overlay: list = None eta: float = None do_not_reload_embeddings: bool = False denoising_strength: float = 0 @@ -865,11 +872,66 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim + # todo: generate masks the old fashioned way else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) + # Generate the mask(s) based on similarity between the original and denoised latent vectors + if getattr(p, "image_mask", None) is not None: + # latent_mask = p.nmask[0].float().cpu() + + # convert the original mask into a form we use to scale distances for thresholding + # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2)) + # mask_scalar = mask_scalar / (1.00001-mask_scalar) + # mask_scalar = mask_scalar.numpy() + + latent_orig = p.init_latent + latent_proc = samples_ddim + latent_distance = torch.norm(latent_proc - latent_orig, p=2, dim=1) + + kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2) + + for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, p.overlay_images)): + converted_mask = distance_map.float().cpu().numpy() + converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.9, percentile_max=1, min_width=1) + converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.25, percentile_max=0.75, min_width=1) + + # The distance at which opacity of original decreases to 50% + # half_weighted_distance = 1 # * mask_scalar + # converted_mask = converted_mask / half_weighted_distance + + converted_mask = 1 / (1 + converted_mask ** 2) + converted_mask = images.smootherstep(converted_mask) + converted_mask = 1 - converted_mask + converted_mask = 255. * converted_mask + converted_mask = converted_mask.astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, p.width, p.height) + converted_mask = create_binary_mask(converted_mask) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if p.paste_to is not None: + converted_mask = uncrop(converted_mask, + (overlay_image.width, overlay_image.height), + p.paste_to) + + p.masks_for_overlay[i] = converted_mask + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + p.overlay_images[i] = image_masked.convert('RGBA') + + x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, + target_device=devices.cpu, + check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) @@ -892,7 +954,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: x_samples_ddim = batch_params.images def infotext(index=0, use_main_prompt=False): - return create_infotext(p, p.prompts, p.seeds, p.subseeds, use_main_prompt=use_main_prompt, index=index, all_negative_prompts=p.negative_prompts) + return create_infotext(p, p.prompts, p.seeds, p.subseeds, + use_main_prompt=use_main_prompt, index=index, + all_negative_prompts=p.negative_prompts) save_samples = p.save_samples() @@ -923,19 +987,27 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction") image = apply_color_correction(p.color_corrections[i], image) + # If the intention is to show the output from the model + # that is being composited over the original image, + # we need to keep the original image around + # and use it in the composite step. + original_denoised_image = image.copy() image = apply_overlay(image, p.paste_to, i, p.overlay_images) if save_samples: - images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p) + images.save_image(image, p.outpath_samples, "", p.seeds[i], + p.prompts[i], opts.samples_format, info=infotext(i), p=p) text = infotext(i) infotexts.append(text) if opts.enable_pnginfo: image.info["parameters"] = text output_images.append(image) - if save_samples and hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): - image_mask = p.mask_for_overlay.convert('RGB') - image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') + if save_samples and hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): + image_mask = p.masks_for_overlay[i].convert('RGB') + image_mask_composite = Image.composite( + original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), + images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA') if opts.save_mask: images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask") @@ -1364,7 +1436,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): nmask: torch.Tensor = field(default=None, init=False) image_conditioning: torch.Tensor = field(default=None, init=False) init_img_hash: str = field(default=None, init=False) - mask_for_overlay: Image = field(default=None, init=False) init_latent: torch.Tensor = field(default=None, init=False) def __post_init__(self): @@ -1415,12 +1486,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image_mask = Image.fromarray(np_mask) if self.inpaint_full_res: - np_mask = np.array(image_mask).astype(np.float32) - np_mask /= 255 - np_mask = 1-pow(1-np_mask, 100) - np_mask *= 255 - np_mask = np.clip(np_mask, 0, 255).astype(np.uint8) - self.mask_for_overlay = Image.fromarray(np_mask) mask = image_mask.convert('L') crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding) crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) @@ -1431,13 +1496,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.paste_to = (x1, y1, x2-x1, y2-y1) else: image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height) - np_mask = np.array(image_mask).astype(np.float32) - np_mask /= 255 - np_mask = 1-pow(1-np_mask, 100) - np_mask *= 255 - np_mask = np.clip(np_mask, 0, 255).astype(np.uint8) - self.mask_for_overlay = Image.fromarray(np_mask) + self.masks_for_overlay = [] self.overlay_images = [] latent_mask = self.latent_mask if self.latent_mask is not None else image_mask @@ -1459,10 +1519,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image = images.resize_image(self.resize_mode, image, self.width, self.height) if image_mask is not None: - image_masked = Image.new('RGBa', (image.width, image.height)) - image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) - - self.overlay_images.append(image_masked.convert('RGBA')) + self.overlay_images.append(image) + self.masks_for_overlay.append(image_mask) # crop_region is not None if we are doing inpaint full res if crop_region is not None: @@ -1486,6 +1544,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.overlay_images is not None: self.overlay_images = self.overlay_images * self.batch_size + if self.masks_for_overlay is not None: + self.masks_for_overlay = self.masks_for_overlay * self.batch_size + if self.color_corrections is not None and len(self.color_corrections) == 1: self.color_corrections = self.color_corrections * self.batch_size -- cgit v1.2.3 From d3fdc4af61b7560eede52290e1ede48185680089 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 3 Dec 2023 18:22:00 +0900 Subject: rework mask and mask_composite logic --- modules/processing.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 4f265801..6f01c95f 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -938,21 +938,20 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if opts.enable_pnginfo: image.info["parameters"] = text output_images.append(image) - if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): - image_mask = p.mask_for_overlay.convert('RGB') - image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') - - if save_samples and opts.save_mask: - images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask") - - if save_samples and opts.save_mask_composite: - images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite") - - if opts.return_mask: - output_images.append(image_mask) - - if opts.return_mask_composite: - output_images.append(image_mask_composite) + if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay: + if opts.return_mask or opts.save_mask: + image_mask = p.mask_for_overlay.convert('RGB') + if save_samples and opts.save_mask: + images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask") + if opts.return_mask: + output_images.append(image_mask) + + if opts.return_mask_composite or opts.save_mask_composite: + image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') + if save_samples and opts.save_mask_composite: + images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite") + if opts.return_mask_composite: + output_images.append(image_mask_composite) del x_samples_ddim -- cgit v1.2.3 From 552f8bc832cd21ee0338e08b6a701687d0d79fad Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Sun, 3 Dec 2023 14:49:41 -0700 Subject: "Uncrop" the original denoised image for the composite step, fixing a "ValueError: Images do not match" *shudder* --- modules/processing.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 66aaab83..cd7216f8 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -994,6 +994,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: # we need to keep the original image around # and use it in the composite step. original_denoised_image = image.copy() + + if p.paste_to is not None: + original_denoised_image = uncrop(original_denoised_image, (p.overlay_images[i].width, p.overlay_images[i].height), p.paste_to) + image = apply_overlay(image, p.paste_to, i, p.overlay_images) if save_samples: -- cgit v1.2.3 From aaacf4823241450d88315af9d465d6815119fe0d Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 01:27:22 -0700 Subject: Organized the settings and UI of soft inpainting to allow for toggling the feature, and centralizes default values to reduce the amount of copy-pasta. --- modules/img2img.py | 14 ++-- modules/processing.py | 5 +- modules/sd_samplers_cfg_denoiser.py | 35 +++++++--- modules/sd_samplers_common.py | 4 +- modules/soft_inpainting.py | 133 ++++++++++++++++++++++++++++++++++++ modules/ui.py | 17 +++-- scripts/outpainting_mk_2.py | 15 ++-- scripts/poor_mans_outpainting.py | 15 ++-- test/test_img2img.py | 8 ++- 9 files changed, 197 insertions(+), 49 deletions(-) create mode 100644 modules/soft_inpainting.py (limited to 'modules/processing.py') diff --git a/modules/img2img.py b/modules/img2img.py index 596f741c..3aa8a9ce 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -15,6 +15,7 @@ import modules.shared as shared import modules.processing as processing from modules.ui import plaintext_to_html import modules.scripts +import modules.soft_inpainting as si def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=False, scale_by=1.0, use_png_info=False, png_info_props=None, png_info_dir=None): @@ -162,6 +163,7 @@ def img2img(id_task: str, sampler_name: str, mask_blur: int, mask_alpha: float, + mask_blend_enabled: bool, mask_blend_power: float, mask_blend_scale: float, inpaint_detail_preservation: float, @@ -227,6 +229,9 @@ def img2img(id_task: str, assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]' + soft_inpainting = si.SoftInpaintingSettings(mask_blend_power, mask_blend_scale, inpaint_detail_preservation) \ + if mask_blend_enabled else None + p = StableDiffusionProcessingImg2Img( sd_model=shared.sd_model, outpath_samples=opts.outdir_samples or opts.outdir_img2img_samples, @@ -244,9 +249,7 @@ def img2img(id_task: str, init_images=[image], mask=mask, mask_blur=mask_blur, - mask_blend_power=mask_blend_power, - mask_blend_scale=mask_blend_scale, - inpaint_detail_preservation=inpaint_detail_preservation, + soft_inpainting=soft_inpainting, inpainting_fill=inpainting_fill, resize_mode=resize_mode, denoising_strength=denoising_strength, @@ -267,9 +270,8 @@ def img2img(id_task: str, if mask: p.extra_generation_params["Mask blur"] = mask_blur - p.extra_generation_params["Mask blending bias"] = mask_blend_power - p.extra_generation_params["Mask blending preservation"] = mask_blend_scale - p.extra_generation_params["Mask blending contrast boost"] = inpaint_detail_preservation + if soft_inpainting is not None: + soft_inpainting.add_generation_params(p.extra_generation_params) with closing(p): if is_batch: diff --git a/modules/processing.py b/modules/processing.py index cd7216f8..b209c84a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -30,6 +30,7 @@ import modules.sd_models as sd_models import modules.sd_vae as sd_vae from ldm.data.util import AddMiDaS from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion +import modules.soft_inpainting as si from einops import repeat, rearrange from blendmodes.blend import blendLayers, BlendType @@ -1425,9 +1426,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): mask_blur_x: int = 4 mask_blur_y: int = 4 mask_blur: int = None - mask_blend_power: float = 1 - mask_blend_scale: float = 0.5 - inpaint_detail_preservation: float = 4 + soft_inpainting: si.SoftInpaintingParameters = si.default inpainting_fill: int = 0 inpaint_full_res: bool = True inpaint_full_res_padding: int = 0 diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index efbe7a40..0ee0b7dd 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -6,6 +6,7 @@ import modules.shared as shared from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback +import modules.soft_inpainting as si def catenate_conds(conds): @@ -43,9 +44,7 @@ class CFGDenoiser(torch.nn.Module): self.model_wrap = None self.mask = None self.nmask = None - self.mask_blend_power = 1 - self.mask_blend_scale = 0.5 - self.inpaint_detail_preservation = 4 + self.soft_inpainting: si.SoftInpaintingParameters = None self.init_latent = None self.steps = None """number of steps as specified by user in UI""" @@ -95,7 +94,8 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['uncond'] = uc def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): - def latent_blend(a, b, t): + def latent_blend(a, b, t, one_minus_t=None): + """ Interpolates two latent image representations according to the parameter t, where the interpolated vectors' magnitudes are also interpolated separately. @@ -104,7 +104,11 @@ class CFGDenoiser(torch.nn.Module): """ # NOTE: We use inplace operations wherever possible. - one_minus_t = 1 - t + if one_minus_t is None: + one_minus_t = 1 - t + + if self.soft_inpainting is None: + return a * one_minus_t + b * t # Linearly interpolate the image vectors. a_scaled = a * one_minus_t @@ -119,10 +123,10 @@ class CFGDenoiser(torch.nn.Module): current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * one_minus_t - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * t + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * one_minus_t + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * t desired_magnitude = a_magnitude - desired_magnitude.add_(b_magnitude).pow_(1 / self.inpaint_detail_preservation) + desired_magnitude.add_(b_magnitude).pow_(1 / self.soft_inpainting.inpaint_detail_preservation) del a_magnitude, b_magnitude, one_minus_t # Change the linearly interpolated image vectors' magnitudes to the value we want. @@ -156,7 +160,10 @@ class CFGDenoiser(torch.nn.Module): NOTE: "mask" is not used """ - return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale) + if self.soft_inpainting is None: + return nmask + + return torch.pow(nmask, (_sigma ** self.soft_inpainting.mask_blend_power) * self.soft_inpainting.mask_blend_scale) if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -176,7 +183,10 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: - x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) + if self.soft_inpainting is None: + x = latent_blend(self.init_latent, x, self.nmask, self.mask) + else: + x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] @@ -279,7 +289,10 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: - denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) + if self.soft_inpainting is None: + denoised = latent_blend(self.init_latent, denoised, self.nmask, self.mask) + else: + denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index ecd8ab0a..9682bee3 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -277,9 +277,7 @@ class Sampler: self.model_wrap_cfg.p = p self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None - self.model_wrap_cfg.mask_blend_power = p.mask_blend_power if hasattr(p, 'mask_blend_power') else None - self.model_wrap_cfg.mask_blend_scale = p.mask_blend_scale if hasattr(p, 'mask_blend_scale') else None - self.model_wrap_cfg.inpaint_detail_preservation = p.inpaint_detail_preservation if hasattr(p, 'inpaint_detail_preservation') else None + self.model_wrap_cfg.soft_inpainting = p.soft_inpainting if hasattr(p, 'soft_inpainting') else None self.model_wrap_cfg.step = 0 self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None) self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0) diff --git a/modules/soft_inpainting.py b/modules/soft_inpainting.py new file mode 100644 index 00000000..259c36ec --- /dev/null +++ b/modules/soft_inpainting.py @@ -0,0 +1,133 @@ +class SoftInpaintingSettings: + def __init__(self, mask_blend_power, mask_blend_scale, inpaint_detail_preservation): + self.mask_blend_power = mask_blend_power + self.mask_blend_scale = mask_blend_scale + self.inpaint_detail_preservation = inpaint_detail_preservation + + def get_paste_fields(self): + return [ + (self.mask_blend_power, gen_param_labels.mask_blend_power), + (self.mask_blend_scale, gen_param_labels.mask_blend_scale), + (self.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation), + ] + + def add_generation_params(self, dest): + dest[enabled_gen_param_label] = True + dest[gen_param_labels.mask_blend_power] = self.mask_blend_power + dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale + dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation + + +enabled_ui_label = "Soft inpainting" +enabled_gen_param_label = "Soft inpainting enabled" +enabled_el_id = "soft_inpainting_enabled" + +default = SoftInpaintingSettings(1, 0.5, 4) +ui_labels = SoftInpaintingSettings("Schedule bias", "Preservation strength", "Transition contrast boost") + +ui_info = SoftInpaintingSettings( + mask_blend_power="Shifts when preservation of original content occurs during denoising.", + # "Below 1: Stronger preservation near the end (with low sigma)\n" + # "1: Balanced (proportional to sigma)\n" + # "Above 1: Stronger preservation in the beginning (with high sigma)", + mask_blend_scale="How strongly partially masked content should be preserved.", + # "Low values: Favors generated content.\n" + # "High values: Favors original content.", + inpaint_detail_preservation="Amplifies the contrast that may be lost in partially masked regions.") + +gen_param_labels = SoftInpaintingSettings("Soft inpainting schedule bias", "Soft inpainting preservation strength", "Soft inpainting transition contrast boost") +el_ids = SoftInpaintingSettings("mask_blend_power", "mask_blend_scale", "inpaint_detail_preservation") + + +def gradio_ui(): + import gradio as gr + from modules.ui_components import InputAccordion + """ + with InputAccordion(False, label="Refiner", elem_id=self.elem_id("enable")) as enable_refiner: + with gr.Row(): + refiner_checkpoint = gr.Dropdown(label='Checkpoint', elem_id=self.elem_id("checkpoint"), choices=sd_models.checkpoint_tiles(), value='', tooltip="switch to another model in the middle of generation") + create_refresh_button(refiner_checkpoint, sd_models.list_models, lambda: {"choices": sd_models.checkpoint_tiles()}, self.elem_id("checkpoint_refresh")) + + refiner_switch_at = gr.Slider(value=0.8, label="Switch at", minimum=0.01, maximum=1.0, step=0.01, elem_id=self.elem_id("switch_at"), tooltip="fraction of sampling steps when the switch to refiner model should happen; 1=never, 0.5=switch in the middle of generation") + + """ + with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled: + with gr.Group(): + gr.Markdown( + """ + Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity. + **High _Mask blur_** values are recommended! + """) + + result = SoftInpaintingSettings( + gr.Slider(label=ui_labels.mask_blend_power, + info=ui_info.mask_blend_power, + minimum=0, + maximum=8, + step=0.1, + value=default.mask_blend_power, + elem_id=el_ids.mask_blend_power), + gr.Slider(label=ui_labels.mask_blend_scale, + info=ui_info.mask_blend_scale, + minimum=0, + maximum=8, + step=0.05, + value=default.mask_blend_scale, + elem_id=el_ids.mask_blend_scale), + gr.Slider(label=ui_labels.inpaint_detail_preservation, + info=ui_info.inpaint_detail_preservation, + minimum=1, + maximum=32, + step=0.5, + value=default.inpaint_detail_preservation, + elem_id=el_ids.inpaint_detail_preservation)) + + with gr.Accordion("Help", open=False): + gr.Markdown( + f""" + ### {ui_labels.mask_blend_power} + + The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas). + This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step. + This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation. + + - **Below 1**: Stronger preservation near the end (with low sigma) + - **1**: Balanced (proportional to sigma) + - **Above 1**: Stronger preservation in the beginning (with high sigma) + """) + gr.Markdown( + f""" + ### {ui_labels.mask_blend_scale} + + Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content. + This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength. + + - **Low values**: Favors generated content. + - **High values**: Favors original content. + """) + gr.Markdown( + f""" + ### {ui_labels.inpaint_detail_preservation} + + This parameter controls how the original latent vectors and denoised latent vectors are interpolated. + With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors. + This can prevent the loss of contrast that occurs with linear interpolation. + + - **Low values**: Softer blending, details may fade. + - **High values**: Stronger contrast, may over-saturate colors. + """) + + return ( + [ + soft_inpainting_enabled, + result.mask_blend_power, + result.mask_blend_scale, + result.inpaint_detail_preservation + ], + [ + (soft_inpainting_enabled, enabled_gen_param_label), + (result.mask_blend_power, gen_param_labels.mask_blend_power), + (result.mask_blend_scale, gen_param_labels.mask_blend_scale), + (result.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation) + ] + ) diff --git a/modules/ui.py b/modules/ui.py index b13ed66c..0e4fb17a 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -29,6 +29,7 @@ import modules.shared as shared from modules import prompt_parser from modules.sd_hijack import model_hijack from modules.generation_parameters_copypaste import image_from_url_text +import modules.soft_inpainting as si create_setting_component = ui_settings.create_setting_component @@ -678,9 +679,16 @@ def create_ui(): with FormRow(): mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur") mask_alpha = gr.Slider(label="Mask transparency", visible=False, elem_id="img2img_mask_alpha") + + with FormRow(): + soft_inpainting = si.gradio_ui() + + + """ mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power") mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id="img2img_mask_blend_scale") inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id="img2img_mask_blend_offset") + """ with FormRow(): inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode") @@ -736,9 +744,7 @@ def create_ui(): sampler_name, mask_blur, mask_alpha, - mask_blend_power, - mask_blend_scale, - inpaint_detail_preservation, + *(soft_inpainting[0]), inpainting_fill, batch_count, batch_size, @@ -837,11 +843,10 @@ def create_ui(): (toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update()), (denoising_strength, "Denoising strength"), (mask_blur, "Mask blur"), - (mask_blend_power, "Mask blending bias"), - (mask_blend_scale, "Mask blending preservation"), - (inpaint_detail_preservation, "Mask blending contrast boost"), + *(soft_inpainting[1]), *scripts.scripts_img2img.infotext_fields ] + parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings) parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields, override_settings) parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding( diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py index bd9cb61b..f7888688 100644 --- a/scripts/outpainting_mk_2.py +++ b/scripts/outpainting_mk_2.py @@ -10,6 +10,7 @@ from PIL import Image, ImageDraw from modules import images from modules.processing import Processed, process_images from modules.shared import opts, state +import modules.soft_inpainting as si # this function is taken from https://github.com/parlance-zz/g-diffuser-bot @@ -133,16 +134,14 @@ class Script(scripts.Script): pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels")) mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8, elem_id=self.elem_id("mask_blur")) - mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) - mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id=self.elem_id("mask_blend_scale")) - inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id=self.elem_id("inpaint_detail_preservation")) + soft_inpainting = si.gradio_ui()[0] direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction")) noise_q = gr.Slider(label="Fall-off exponent (lower=higher detail)", minimum=0.0, maximum=4.0, step=0.01, value=1.0, elem_id=self.elem_id("noise_q")) color_variation = gr.Slider(label="Color variation", minimum=0.0, maximum=1.0, step=0.01, value=0.05, elem_id=self.elem_id("color_variation")) - return [info, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation] + return [info, pixels, mask_blur, *soft_inpainting, direction, noise_q, color_variation] - def run(self, p, _, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation): + def run(self, p, _, pixels, mask_blur, mask_blend_enabled, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, direction, noise_q, color_variation): initial_seed_and_info = [None, None] process_width = p.width @@ -170,9 +169,9 @@ class Script(scripts.Script): p.mask_blur_x = mask_blur_x*4 p.mask_blur_y = mask_blur_y*4 - p.mask_blend_power = mask_blend_power - p.mask_blend_scale = mask_blend_scale - p.inpaint_detail_preservation = inpaint_detail_preservation + + p.soft_inpainting = si.SoftInpaintingSettings(mask_blend_power, mask_blend_scale, inpaint_detail_preservation) \ + if mask_blend_enabled else None init_img = p.init_images[0] target_w = math.ceil((init_img.width + left + right) / 64) * 64 diff --git a/scripts/poor_mans_outpainting.py b/scripts/poor_mans_outpainting.py index 5388f5db..11f7f74a 100644 --- a/scripts/poor_mans_outpainting.py +++ b/scripts/poor_mans_outpainting.py @@ -7,6 +7,7 @@ from PIL import Image, ImageDraw from modules import images, devices from modules.processing import Processed, process_images from modules.shared import opts, state +import modules.soft_inpainting as si class Script(scripts.Script): @@ -22,23 +23,19 @@ class Script(scripts.Script): pixels = gr.Slider(label="Pixels to expand", minimum=8, maximum=256, step=8, value=128, elem_id=self.elem_id("pixels")) mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id=self.elem_id("mask_blur")) - mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id=self.elem_id("mask_blend_power")) - mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id=self.elem_id("mask_blend_scale")) - inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id=self.elem_id("inpaint_detail_preservation")) + soft_inpainting = si.gradio_ui()[0] inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", elem_id=self.elem_id("inpainting_fill")) direction = gr.CheckboxGroup(label="Outpainting direction", choices=['left', 'right', 'up', 'down'], value=['left', 'right', 'up', 'down'], elem_id=self.elem_id("direction")) - return [pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction] + return [pixels, mask_blur, *soft_inpainting, inpainting_fill, direction] - def run(self, p, pixels, mask_blur, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction): + def run(self, p, pixels, mask_blur, mask_blend_enabled, mask_blend_power, mask_blend_scale, inpaint_detail_preservation, inpainting_fill, direction): initial_seed = None initial_info = None p.mask_blur = mask_blur * 2 - p.mask_blend_power = mask_blend_power - p.mask_blend_scale = mask_blend_scale - p.inpaint_detail_preservation = inpaint_detail_preservation - + p.soft_inpainting = si.SoftInpaintingSettings(mask_blend_power, mask_blend_scale, inpaint_detail_preservation) \ + if mask_blend_enabled else None p.inpainting_fill = inpainting_fill p.inpaint_full_res = False diff --git a/test/test_img2img.py b/test/test_img2img.py index 5cda2dba..87bd8509 100644 --- a/test/test_img2img.py +++ b/test/test_img2img.py @@ -1,6 +1,7 @@ import pytest import requests +import modules.soft_inpainting as si @pytest.fixture() @@ -24,9 +25,10 @@ def simple_img2img_request(img2img_basic_image_base64): "inpainting_mask_invert": False, "mask": None, "mask_blur": 4, - "mask_blend_power": 1, - "mask_blend_scale": 0.5, - "inpaint_detail_preservation": 4, + "mask_blend_enabled": True, + "mask_blend_power": si.default.mask_blend_power, + "mask_blend_scale": si.default.mask_blend_scale, + "inpaint_detail_preservation": si.default.inpaint_detail_preservation, "n_iter": 1, "negative_prompt": "", "override_settings": {}, -- cgit v1.2.3 From 259d33c3c8e27557cb9bab9b3a1dd7fc7450d16c Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 01:57:21 -0700 Subject: Enables the original functionality to be toggled on and off. --- modules/processing.py | 99 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 29 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index b209c84a..b40b1a40 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -88,9 +88,12 @@ def apply_overlay(image, paste_loc, index, overlays): return image -def create_binary_mask(image): +def create_binary_mask(image, round=True): if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255): - image = image.split()[-1].convert("L") + if round: + image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0) + else: + image = image.split()[-1].convert("L") else: image = image.convert('L') return image @@ -316,7 +319,7 @@ class StableDiffusionProcessing: c_adm = torch.cat((c_adm, noise_level_emb), 1) return c_adm - def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None): + def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True): self.is_using_inpainting_conditioning = True # Handle the different mask inputs @@ -327,6 +330,11 @@ class StableDiffusionProcessing: conditioning_mask = np.array(image_mask.convert("L")) conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + + if round_image_mask: + # Caller is requesting a discretized mask as input, so we round to either 1.0 or 0.0 + conditioning_mask = torch.round(conditioning_mask) + else: conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:]) @@ -350,7 +358,7 @@ class StableDiffusionProcessing: return image_conditioning - def img2img_image_conditioning(self, source_image, latent_image, image_mask=None): + def img2img_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True): source_image = devices.cond_cast_float(source_image) # HACK: Using introspection as the Depth2Image model doesn't appear to uniquely @@ -362,7 +370,10 @@ class StableDiffusionProcessing: return self.edit_image_conditioning(source_image) if self.sampler.conditioning_key in {'hybrid', 'concat'}: - return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask) + return self.inpainting_image_conditioning(source_image, + latent_image, + image_mask=image_mask, + round_image_mask=round_image_mask) if self.sampler.conditioning_key == "crossattn-adm": return self.unclip_image_conditioning(source_image) @@ -878,8 +889,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method + # Generate the mask(s) based on similarity between the original and denoised latent vectors - if getattr(p, "image_mask", None) is not None: + if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None: # latent_mask = p.nmask[0].float().cpu() # convert the original mask into a form we use to scale distances for thresholding @@ -911,7 +923,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: converted_mask = converted_mask.astype(np.uint8) converted_mask = Image.fromarray(converted_mask) converted_mask = images.resize_image(2, converted_mask, p.width, p.height) - converted_mask = create_binary_mask(converted_mask) + converted_mask = create_binary_mask(converted_mask, round=False) # Remove aliasing artifacts using a gaussian blur. converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) @@ -1010,23 +1022,33 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if opts.enable_pnginfo: image.info["parameters"] = text output_images.append(image) - if save_samples and hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): - image_mask = p.masks_for_overlay[i].convert('RGB') - image_mask_composite = Image.composite( - original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), - images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA') - - if opts.save_mask: - images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask") - - if opts.save_mask_composite: - images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite") - - if opts.return_mask: - output_images.append(image_mask) - - if opts.return_mask_composite: - output_images.append(image_mask_composite) + if save_samples and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): + if hasattr(p, 'masks_for_overlay') and p.masks_for_overlay: + image_mask = p.masks_for_overlay[i].convert('RGB') + image_mask_composite = Image.composite( + original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), + images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA') + elif hasattr(p, 'mask_for_overlay') and p.mask_for_overlay: + image_mask = p.mask_for_overlay.convert('RGB') + image_mask_composite = Image.composite( + original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), + images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') + else: + image_mask = None + image_mask_composite = None + + if image_mask is not None and image_mask_composite is not None: + if opts.save_mask: + images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask") + + if opts.save_mask_composite: + images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite") + + if opts.return_mask: + output_images.append(image_mask) + + if opts.return_mask_composite: + output_images.append(image_mask_composite) del x_samples_ddim @@ -1439,6 +1461,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): nmask: torch.Tensor = field(default=None, init=False) image_conditioning: torch.Tensor = field(default=None, init=False) init_img_hash: str = field(default=None, init=False) + mask_for_overlay: Image = field(default=None, init=False) init_latent: torch.Tensor = field(default=None, init=False) def __post_init__(self): @@ -1471,7 +1494,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if image_mask is not None: # image_mask is passed in as RGBA by Gradio to support alpha masks, # but we still want to support binary masks. - image_mask = create_binary_mask(image_mask) + image_mask = create_binary_mask(image_mask, round=(self.soft_inpainting is None)) if self.inpainting_mask_invert: image_mask = ImageOps.invert(image_mask) @@ -1489,6 +1512,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image_mask = Image.fromarray(np_mask) if self.inpaint_full_res: + self.mask_for_overlay = image_mask if self.soft_inpainting is None else None mask = image_mask.convert('L') crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding) crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) @@ -1500,7 +1524,12 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): else: image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height) - self.masks_for_overlay = [] + if self.soft_inpainting is None: + np_mask = np.array(image_mask) + np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8) + self.mask_for_overlay = Image.fromarray(np_mask) + + self.masks_for_overlay = [] if self.soft_inpainting is not None else None self.overlay_images = [] latent_mask = self.latent_mask if self.latent_mask is not None else image_mask @@ -1522,8 +1551,15 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image = images.resize_image(self.resize_mode, image, self.width, self.height) if image_mask is not None: - self.overlay_images.append(image) - self.masks_for_overlay.append(image_mask) + if self.soft_inpainting is not None: + # We apply the masks AFTER to adjust mask based on changed content. + self.overlay_images.append(image) + self.masks_for_overlay.append(image_mask) + else: + image_masked = Image.new('RGBa', (image.width, image.height)) + image_masked.paste(image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + self.overlay_images.append(image_masked.convert('RGBA')) # crop_region is not None if we are doing inpaint full res if crop_region is not None: @@ -1576,6 +1612,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 latmask = latmask[0] + if self.soft_inpainting is None: + latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype) @@ -1587,7 +1625,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask - self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask) + self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, + self.init_latent, + image_mask, + self.soft_inpainting is None) def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): x = self.rng.next() -- cgit v1.2.3 From 976c1053efeb5054692ed3cfa294cf79196f3946 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 16:06:58 -0700 Subject: Cleaned up code, moved main code contributions into soft_inpainting.py --- modules/processing.py | 56 ++---------- modules/sd_samplers_cfg_denoiser.py | 84 ++--------------- modules/soft_inpainting.py | 177 ++++++++++++++++++++++++++++++++---- modules/ui.py | 7 -- 4 files changed, 174 insertions(+), 150 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index b40b1a40..0b360387 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -892,55 +892,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: # Generate the mask(s) based on similarity between the original and denoised latent vectors if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None: - # latent_mask = p.nmask[0].float().cpu() - - # convert the original mask into a form we use to scale distances for thresholding - # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2)) - # mask_scalar = mask_scalar / (1.00001-mask_scalar) - # mask_scalar = mask_scalar.numpy() - - latent_orig = p.init_latent - latent_proc = samples_ddim - latent_distance = torch.norm(latent_proc - latent_orig, p=2, dim=1) - - kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2) - - for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, p.overlay_images)): - converted_mask = distance_map.float().cpu().numpy() - converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, - percentile_min=0.9, percentile_max=1, min_width=1) - converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, - percentile_min=0.25, percentile_max=0.75, min_width=1) - - # The distance at which opacity of original decreases to 50% - # half_weighted_distance = 1 # * mask_scalar - # converted_mask = converted_mask / half_weighted_distance - - converted_mask = 1 / (1 + converted_mask ** 2) - converted_mask = images.smootherstep(converted_mask) - converted_mask = 1 - converted_mask - converted_mask = 255. * converted_mask - converted_mask = converted_mask.astype(np.uint8) - converted_mask = Image.fromarray(converted_mask) - converted_mask = images.resize_image(2, converted_mask, p.width, p.height) - converted_mask = create_binary_mask(converted_mask, round=False) - - # Remove aliasing artifacts using a gaussian blur. - converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) - - # Expand the mask to fit the whole image if needed. - if p.paste_to is not None: - converted_mask = uncrop(converted_mask, - (overlay_image.width, overlay_image.height), - p.paste_to) - - p.masks_for_overlay[i] = converted_mask - - image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) - image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), - mask=ImageOps.invert(converted_mask.convert('L'))) - - p.overlay_images[i] = image_masked.convert('RGBA') + si.generate_adaptive_masks(latent_orig=p.init_latent, + latent_processed=samples_ddim, + overlay_images=p.overlay_images, + masks_for_overlay=p.masks_for_overlay, + width=p.width, + height=p.height, + paste_to=p.paste_to) x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 0ee0b7dd..a700e692 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -94,76 +94,6 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['uncond'] = uc def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): - def latent_blend(a, b, t, one_minus_t=None): - - """ - Interpolates two latent image representations according to the parameter t, - where the interpolated vectors' magnitudes are also interpolated separately. - The "detail_preservation" factor biases the magnitude interpolation towards - the larger of the two magnitudes. - """ - # NOTE: We use inplace operations wherever possible. - - if one_minus_t is None: - one_minus_t = 1 - t - - if self.soft_inpainting is None: - return a * one_minus_t + b * t - - # Linearly interpolate the image vectors. - a_scaled = a * one_minus_t - b_scaled = b * t - image_interp = a_scaled - image_interp.add_(b_scaled) - result_type = image_interp.dtype - del a_scaled, b_scaled - - # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) - # 64-bit operations are used here to allow large exponents. - current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) - - # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * one_minus_t - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * t - desired_magnitude = a_magnitude - desired_magnitude.add_(b_magnitude).pow_(1 / self.soft_inpainting.inpaint_detail_preservation) - del a_magnitude, b_magnitude, one_minus_t - - # Change the linearly interpolated image vectors' magnitudes to the value we want. - # This is the last 64-bit operation. - image_interp_scaling_factor = desired_magnitude - image_interp_scaling_factor.div_(current_magnitude) - image_interp_scaled = image_interp - image_interp_scaled.mul_(image_interp_scaling_factor) - del current_magnitude - del desired_magnitude - del image_interp - del image_interp_scaling_factor - - image_interp_scaled = image_interp_scaled.to(result_type) - del result_type - - return image_interp_scaled - - def get_modified_nmask(nmask, _sigma): - """ - Converts a negative mask representing the transparency of the original latent vectors being overlayed - to a mask that is scaled according to the denoising strength for this step. - - Where: - 0 = fully opaque, infinite density, fully masked - 1 = fully transparent, zero density, fully unmasked - - We bring this transparency to a power, as this allows one to simulate N number of blending operations - where N can be any positive real value. Using this one can control the balance of influence between - the denoiser and the original latents according to the sigma value. - - NOTE: "mask" is not used - """ - if self.soft_inpainting is None: - return nmask - - return torch.pow(nmask, (_sigma ** self.soft_inpainting.mask_blend_power) * self.soft_inpainting.mask_blend_scale) if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -184,9 +114,12 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: if self.soft_inpainting is None: - x = latent_blend(self.init_latent, x, self.nmask, self.mask) + x = self.init_latent * self.mask + self.nmask * x else: - x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) + x = si.latent_blend(self.soft_inpainting, + self.init_latent, + x, + si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma)) batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] @@ -290,9 +223,12 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: if self.soft_inpainting is None: - denoised = latent_blend(self.init_latent, denoised, self.nmask, self.mask) + denoised = self.init_latent * self.mask + self.nmask * denoised else: - denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) + denoised = si.latent_blend(self.soft_inpainting, + self.init_latent, + denoised, + si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma)) self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma) diff --git a/modules/soft_inpainting.py b/modules/soft_inpainting.py index 259c36ec..b81c8dd9 100644 --- a/modules/soft_inpainting.py +++ b/modules/soft_inpainting.py @@ -4,13 +4,6 @@ class SoftInpaintingSettings: self.mask_blend_scale = mask_blend_scale self.inpaint_detail_preservation = inpaint_detail_preservation - def get_paste_fields(self): - return [ - (self.mask_blend_power, gen_param_labels.mask_blend_power), - (self.mask_blend_scale, gen_param_labels.mask_blend_scale), - (self.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation), - ] - def add_generation_params(self, dest): dest[enabled_gen_param_label] = True dest[gen_param_labels.mask_blend_power] = self.mask_blend_power @@ -18,25 +11,169 @@ class SoftInpaintingSettings: dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation +# ------------------- Methods ------------------- + + +def latent_blend(soft_inpainting, a, b, t): + """ + Interpolates two latent image representations according to the parameter t, + where the interpolated vectors' magnitudes are also interpolated separately. + The "detail_preservation" factor biases the magnitude interpolation towards + the larger of the two magnitudes. + """ + import torch + + # NOTE: We use inplace operations wherever possible. + + one_minus_t = 1 - t + + # Linearly interpolate the image vectors. + a_scaled = a * one_minus_t + b_scaled = b * t + image_interp = a_scaled + image_interp.add_(b_scaled) + result_type = image_interp.dtype + del a_scaled, b_scaled + + # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) + # 64-bit operations are used here to allow large exponents. + current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) + + # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * one_minus_t + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * t + desired_magnitude = a_magnitude + desired_magnitude.add_(b_magnitude).pow_(1 / soft_inpainting.inpaint_detail_preservation) + del a_magnitude, b_magnitude, one_minus_t + + # Change the linearly interpolated image vectors' magnitudes to the value we want. + # This is the last 64-bit operation. + image_interp_scaling_factor = desired_magnitude + image_interp_scaling_factor.div_(current_magnitude) + image_interp_scaling_factor = image_interp_scaling_factor.to(result_type) + image_interp_scaled = image_interp + image_interp_scaled.mul_(image_interp_scaling_factor) + del current_magnitude + del desired_magnitude + del image_interp + del image_interp_scaling_factor + del result_type + + return image_interp_scaled + + +def get_modified_nmask(soft_inpainting, nmask, sigma): + """ + Converts a negative mask representing the transparency of the original latent vectors being overlayed + to a mask that is scaled according to the denoising strength for this step. + + Where: + 0 = fully opaque, infinite density, fully masked + 1 = fully transparent, zero density, fully unmasked + + We bring this transparency to a power, as this allows one to simulate N number of blending operations + where N can be any positive real value. Using this one can control the balance of influence between + the denoiser and the original latents according to the sigma value. + + NOTE: "mask" is not used + """ + import torch + return torch.pow(nmask, (sigma ** soft_inpainting.mask_blend_power) * soft_inpainting.mask_blend_scale) + + +def generate_adaptive_masks( + latent_orig, + latent_processed, + overlay_images, + masks_for_overlay, + width, height, + paste_to): + import torch + import numpy as np + import modules.processing as proc + import modules.images as images + from PIL import Image, ImageOps, ImageFilter + + # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control. + # latent_mask = p.nmask[0].float().cpu() + # convert the original mask into a form we use to scale distances for thresholding + # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2)) + # mask_scalar = mask_scalar / (1.00001-mask_scalar) + # mask_scalar = mask_scalar.numpy() + + latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1) + + kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2) + + for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)): + converted_mask = distance_map.float().cpu().numpy() + converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.9, percentile_max=1, min_width=1) + converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.25, percentile_max=0.75, min_width=1) + + # The distance at which opacity of original decreases to 50% + # half_weighted_distance = 1 # * mask_scalar + # converted_mask = converted_mask / half_weighted_distance + + converted_mask = 1 / (1 + converted_mask ** 2) + converted_mask = images.smootherstep(converted_mask) + converted_mask = 1 - converted_mask + converted_mask = 255. * converted_mask + converted_mask = converted_mask.astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, width, height) + converted_mask = proc.create_binary_mask(converted_mask, round=False) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if paste_to is not None: + converted_mask = proc. uncrop(converted_mask, + (overlay_image.width, overlay_image.height), + paste_to) + + masks_for_overlay[i] = converted_mask + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + overlay_images[i] = image_masked.convert('RGBA') + + +# ------------------- Constants ------------------- + + +default = SoftInpaintingSettings(1, 0.5, 4) + enabled_ui_label = "Soft inpainting" enabled_gen_param_label = "Soft inpainting enabled" enabled_el_id = "soft_inpainting_enabled" -default = SoftInpaintingSettings(1, 0.5, 4) -ui_labels = SoftInpaintingSettings("Schedule bias", "Preservation strength", "Transition contrast boost") +ui_labels = SoftInpaintingSettings( + "Schedule bias", + "Preservation strength", + "Transition contrast boost") ui_info = SoftInpaintingSettings( - mask_blend_power="Shifts when preservation of original content occurs during denoising.", - # "Below 1: Stronger preservation near the end (with low sigma)\n" - # "1: Balanced (proportional to sigma)\n" - # "Above 1: Stronger preservation in the beginning (with high sigma)", - mask_blend_scale="How strongly partially masked content should be preserved.", - # "Low values: Favors generated content.\n" - # "High values: Favors original content.", - inpaint_detail_preservation="Amplifies the contrast that may be lost in partially masked regions.") - -gen_param_labels = SoftInpaintingSettings("Soft inpainting schedule bias", "Soft inpainting preservation strength", "Soft inpainting transition contrast boost") -el_ids = SoftInpaintingSettings("mask_blend_power", "mask_blend_scale", "inpaint_detail_preservation") + "Shifts when preservation of original content occurs during denoising.", + "How strongly partially masked content should be preserved.", + "Amplifies the contrast that may be lost in partially masked regions.") + +gen_param_labels = SoftInpaintingSettings( + "Soft inpainting schedule bias", + "Soft inpainting preservation strength", + "Soft inpainting transition contrast boost") + +el_ids = SoftInpaintingSettings( + "mask_blend_power", + "mask_blend_scale", + "inpaint_detail_preservation") + + +# ------------------- UI ------------------- def gradio_ui(): diff --git a/modules/ui.py b/modules/ui.py index 0e4fb17a..4f1265a3 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -683,13 +683,6 @@ def create_ui(): with FormRow(): soft_inpainting = si.gradio_ui() - - """ - mask_blend_power = gr.Slider(label='Blending bias', minimum=0, maximum=8, step=0.1, value=1, elem_id="img2img_mask_blend_power") - mask_blend_scale = gr.Slider(label='Blending preservation', minimum=0, maximum=8, step=0.05, value=0.5, elem_id="img2img_mask_blend_scale") - inpaint_detail_preservation = gr.Slider(label='Blending contrast boost', minimum=1, maximum=32, step=0.5, value=4, elem_id="img2img_mask_blend_offset") - """ - with FormRow(): inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode") -- cgit v1.2.3 From 57f29bd61dc30f1a8c94ead9b780f4655f7d7d6d Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 17:41:18 -0700 Subject: Re-introduce latent blending step from the vanilla inpainting procedure. --- modules/processing.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 0b360387..c8dc4d93 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1597,6 +1597,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) + if self.mask is not None and self.soft_inpainting is None: + samples = samples * self.nmask + self.init_latent * self.mask + del x devices.torch_gc() -- cgit v1.2.3 From 60c602232fd760fb548fb0b3d18b5297f8823c2a Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 17:41:51 -0700 Subject: Restored original formatting. --- modules/processing.py | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index c8dc4d93..90ae249a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -370,10 +370,7 @@ class StableDiffusionProcessing: return self.edit_image_conditioning(source_image) if self.sampler.conditioning_key in {'hybrid', 'concat'}: - return self.inpainting_image_conditioning(source_image, - latent_image, - image_mask=image_mask, - round_image_mask=round_image_mask) + return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask, round_image_mask=round_image_mask) if self.sampler.conditioning_key == "crossattn-adm": return self.unclip_image_conditioning(source_image) @@ -885,7 +882,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim - # todo: generate masks the old fashioned way + # todo: generate adaptive masks based on pixel differences. + # if p.masks_for_overlay is used, it will already be populated with masks else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method @@ -900,9 +898,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: height=p.height, paste_to=p.paste_to) - x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, - target_device=devices.cpu, - check_for_nans=True) + x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) @@ -927,9 +923,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: x_samples_ddim = batch_params.images def infotext(index=0, use_main_prompt=False): - return create_infotext(p, p.prompts, p.seeds, p.subseeds, - use_main_prompt=use_main_prompt, index=index, - all_negative_prompts=p.negative_prompts) + return create_infotext(p, p.prompts, p.seeds, p.subseeds, use_main_prompt=use_main_prompt, index=index, all_negative_prompts=p.negative_prompts) save_samples = p.save_samples() @@ -972,8 +966,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: image = apply_overlay(image, p.paste_to, i, p.overlay_images) if save_samples: - images.save_image(image, p.outpath_samples, "", p.seeds[i], - p.prompts[i], opts.samples_format, info=infotext(i), p=p) + images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p) text = infotext(i) infotexts.append(text) @@ -983,14 +976,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if save_samples and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): if hasattr(p, 'masks_for_overlay') and p.masks_for_overlay: image_mask = p.masks_for_overlay[i].convert('RGB') - image_mask_composite = Image.composite( - original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), - images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA') + image_mask_composite = Image.composite(original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.masks_for_overlay[i], image.width, image.height).convert('L')).convert('RGBA') elif hasattr(p, 'mask_for_overlay') and p.mask_for_overlay: image_mask = p.mask_for_overlay.convert('RGB') - image_mask_composite = Image.composite( - original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), - images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') + image_mask_composite = Image.composite(original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') else: image_mask = None image_mask_composite = None @@ -1515,8 +1504,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.masks_for_overlay.append(image_mask) else: image_masked = Image.new('RGBa', (image.width, image.height)) - image_masked.paste(image.convert("RGBA").convert("RGBa"), - mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + self.overlay_images.append(image_masked.convert('RGBA')) # crop_region is not None if we are doing inpaint full res @@ -1583,10 +1572,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask - self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, - self.init_latent, - image_mask, - self.soft_inpainting is None) + self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.soft_inpainting is None) def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): x = self.rng.next() -- cgit v1.2.3 From b32a334e3da7b06d82441beaa08a673b4f55bca1 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 17:57:10 -0700 Subject: Applies a convert('RGBA') operation early to mimic previous behaviour. --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 90ae249a..7fc282cf 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1500,7 +1500,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if image_mask is not None: if self.soft_inpainting is not None: # We apply the masks AFTER to adjust mask based on changed content. - self.overlay_images.append(image) + self.overlay_images.append(image.convert('RGBA')) self.masks_for_overlay.append(image_mask) else: image_masked = Image.new('RGBa', (image.width, image.height)) -- cgit v1.2.3 From 6fc12428e3c5f903584ca7986e0c441f80fa2807 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 19:42:59 -0700 Subject: Fixed issue where batched inpainting (batch size > 1) wouldn't work because of mismatched tensor sizes. The 'already_decoded' decoded case should also be handled correctly (tested indirectly). --- modules/processing.py | 23 ++++++++++------ modules/soft_inpainting.py | 66 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 71 insertions(+), 18 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 7fc282cf..71bb056a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -883,20 +883,27 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim # todo: generate adaptive masks based on pixel differences. - # if p.masks_for_overlay is used, it will already be populated with masks + if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None: + si.apply_masks(soft_inpainting=p.soft_inpainting, + nmask=p.nmask, + overlay_images=p.overlay_images, + masks_for_overlay=p.masks_for_overlay, + width=p.width, + height=p.height, + paste_to=p.paste_to) else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method # Generate the mask(s) based on similarity between the original and denoised latent vectors if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None: - si.generate_adaptive_masks(latent_orig=p.init_latent, - latent_processed=samples_ddim, - overlay_images=p.overlay_images, - masks_for_overlay=p.masks_for_overlay, - width=p.width, - height=p.height, - paste_to=p.paste_to) + si.apply_adaptive_masks(latent_orig=p.init_latent, + latent_processed=samples_ddim, + overlay_images=p.overlay_images, + masks_for_overlay=p.masks_for_overlay, + width=p.width, + height=p.height, + paste_to=p.paste_to) x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) diff --git a/modules/soft_inpainting.py b/modules/soft_inpainting.py index 56a87774..b36ac8fa 100644 --- a/modules/soft_inpainting.py +++ b/modules/soft_inpainting.py @@ -25,26 +25,32 @@ def latent_blend(soft_inpainting, a, b, t): # NOTE: We use inplace operations wherever possible. - one_minus_t = 1 - t + # [4][w][h] to [1][4][w][h] + t2 = t.unsqueeze(0) + # [4][w][h] to [1][1][w][h] - the [4] seem redundant. + t3 = t[0].unsqueeze(0).unsqueeze(0) + + one_minus_t2 = 1 - t2 + one_minus_t3 = 1 - t3 # Linearly interpolate the image vectors. - a_scaled = a * one_minus_t - b_scaled = b * t + a_scaled = a * one_minus_t2 + b_scaled = b * t2 image_interp = a_scaled image_interp.add_(b_scaled) result_type = image_interp.dtype - del a_scaled, b_scaled + del a_scaled, b_scaled, t2, one_minus_t2 # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) # 64-bit operations are used here to allow large exponents. - current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) + current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001) # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * one_minus_t - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * t + a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * one_minus_t3 + b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * t3 desired_magnitude = a_magnitude desired_magnitude.add_(b_magnitude).pow_(1 / soft_inpainting.inpaint_detail_preservation) - del a_magnitude, b_magnitude, one_minus_t + del a_magnitude, b_magnitude, t3, one_minus_t3 # Change the linearly interpolated image vectors' magnitudes to the value we want. # This is the last 64-bit operation. @@ -78,10 +84,11 @@ def get_modified_nmask(soft_inpainting, nmask, sigma): NOTE: "mask" is not used """ import torch - return torch.pow(nmask, (sigma ** soft_inpainting.mask_blend_power) * soft_inpainting.mask_blend_scale) + # todo: Why is sigma 2D? Both values are the same. + return torch.pow(nmask, (sigma[0] ** soft_inpainting.mask_blend_power) * soft_inpainting.mask_blend_scale) -def generate_adaptive_masks( +def apply_adaptive_masks( latent_orig, latent_processed, overlay_images, @@ -142,6 +149,45 @@ def generate_adaptive_masks( overlay_images[i] = image_masked.convert('RGBA') +def apply_masks( + soft_inpainting, + nmask, + overlay_images, + masks_for_overlay, + width, height, + paste_to): + import torch + import numpy as np + import modules.processing as proc + import modules.images as images + from PIL import Image, ImageOps, ImageFilter + + converted_mask = nmask[0].float() + converted_mask = torch.clamp(converted_mask, min=0, max=1).pow_(soft_inpainting.mask_blend_scale / 2) + converted_mask = 255. * converted_mask + converted_mask = converted_mask.cpu().numpy().astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, width, height) + converted_mask = proc.create_binary_mask(converted_mask, round=False) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if paste_to is not None: + converted_mask = proc.uncrop(converted_mask, + (width, height), + paste_to) + + for i, overlay_image in enumerate(overlay_images): + masks_for_overlay[i] = converted_mask + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + overlay_images[i] = image_masked.convert('RGBA') + # ------------------- Constants ------------------- -- cgit v1.2.3 From 49bbf1140731036875573bb7c44aa7e74623c856 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 19:47:40 -0700 Subject: Fixed unused import. --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 71bb056a..e1823ac3 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field import torch import numpy as np -from PIL import Image, ImageOps, ImageFilter +from PIL import Image, ImageOps import random import cv2 from skimage import exposure -- cgit v1.2.3 From ac4578912395627731f2cd8529f87a95df1f7644 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Wed, 6 Dec 2023 21:16:27 -0700 Subject: Removed soft inpainting, added hooks for softpainting to work instead. --- modules/processing.py | 94 +++++++++++++++---------------------- modules/scripts.py | 70 +++++++++++++++++++++++++++ modules/sd_samplers_cfg_denoiser.py | 23 ++++----- 3 files changed, 118 insertions(+), 69 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 7d46949f..5a1a90af 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -30,7 +30,6 @@ import modules.sd_models as sd_models import modules.sd_vae as sd_vae from ldm.data.util import AddMiDaS from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion -import modules.soft_inpainting as si from einops import repeat, rearrange from blendmodes.blend import blendLayers, BlendType @@ -73,12 +72,10 @@ def uncrop(image, dest_size, paste_loc): return image -def apply_overlay(image, paste_loc, index, overlays): - if overlays is None or index >= len(overlays): +def apply_overlay(image, paste_loc, overlay): + if overlay is None: return image - overlay = overlays[index] - if paste_loc is not None: image = uncrop(image, (overlay.width, overlay.height), paste_loc) @@ -150,7 +147,6 @@ class StableDiffusionProcessing: do_not_save_grid: bool = False extra_generation_params: dict[str, Any] = None overlay_images: list = None - masks_for_overlay: list = None eta: float = None do_not_reload_embeddings: bool = False denoising_strength: float = None @@ -880,31 +876,17 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast(): samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts) + if p.scripts is not None: + ps = scripts.PostSampleArgs(samples_ddim) + p.scripts.post_sample(p, ps) + samples_ddim = pp.samples + if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim - # todo: generate adaptive masks based on pixel differences. - if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None: - si.apply_masks(soft_inpainting=p.soft_inpainting, - nmask=p.nmask, - overlay_images=p.overlay_images, - masks_for_overlay=p.masks_for_overlay, - width=p.width, - height=p.height, - paste_to=p.paste_to) else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - # Generate the mask(s) based on similarity between the original and denoised latent vectors - if getattr(p, "image_mask", None) is not None and getattr(p, "soft_inpainting", None) is not None: - si.apply_adaptive_masks(latent_orig=p.init_latent, - latent_processed=samples_ddim, - overlay_images=p.overlay_images, - masks_for_overlay=p.masks_for_overlay, - width=p.width, - height=p.height, - paste_to=p.paste_to) - x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() @@ -955,9 +937,18 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: pp = scripts.PostprocessImageArgs(image) p.scripts.postprocess_image(p, pp) image = pp.image + + mask_for_overlay = p.mask_for_overlay + overlay_image = p.overlay_images[i] if p.overlay_images is not None and i < len(p.overlay_images) else None + + if p.scripts is not None: + ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image) + p.scripts.postprocess_maskoverlay(p, ppmo) + mask_for_overlay, overlay_image = pp.mask_for_overlay, pp.overlay_image + if p.color_corrections is not None and i < len(p.color_corrections): if save_samples and opts.save_images_before_color_correction: - image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images) + image_without_cc = apply_overlay(image, p.paste_to, overlay_image) images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction") image = apply_color_correction(p.color_corrections[i], image) @@ -968,9 +959,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: original_denoised_image = image.copy() if p.paste_to is not None: - original_denoised_image = uncrop(original_denoised_image, (p.overlay_images[i].width, p.overlay_images[i].height), p.paste_to) + original_denoised_image = uncrop(original_denoised_image, (p.overlay_image.width, p.overlay_image.height), p.paste_to) - image = apply_overlay(image, p.paste_to, i, p.overlay_images) + image = apply_overlay(image, p.paste_to, overlay_image) if save_samples: images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p) @@ -981,13 +972,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: image.info["parameters"] = text output_images.append(image) - if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay: - mask_for_overlay = p.mask_for_overlay - elif hasattr(p, 'masks_for_overlay') and p.masks_for_overlay and p.masks_for_overlay[i]: - mask_for_overlay = p.masks_for_overlay[i] - else: - mask_for_overlay = None - if mask_for_overlay is not None: if opts.return_mask or opts.save_mask: image_mask = mask_for_overlay.convert('RGB') @@ -1401,7 +1385,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): mask_blur_x: int = 4 mask_blur_y: int = 4 mask_blur: int = None - soft_inpainting: si.SoftInpaintingParameters = si.default + mask_round: bool = True inpainting_fill: int = 0 inpaint_full_res: bool = True inpaint_full_res_padding: int = 0 @@ -1447,7 +1431,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if image_mask is not None: # image_mask is passed in as RGBA by Gradio to support alpha masks, # but we still want to support binary masks. - image_mask = create_binary_mask(image_mask, round=(self.soft_inpainting is None)) + image_mask = create_binary_mask(image_mask, round=self.mask_round) if self.inpainting_mask_invert: image_mask = ImageOps.invert(image_mask) @@ -1465,7 +1449,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image_mask = Image.fromarray(np_mask) if self.inpaint_full_res: - self.mask_for_overlay = image_mask if self.soft_inpainting is None else None + self.mask_for_overlay = image_mask mask = image_mask.convert('L') crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding) crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) @@ -1476,13 +1460,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.paste_to = (x1, y1, x2-x1, y2-y1) else: image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height) + np_mask = np.array(image_mask) + np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8) + self.mask_for_overlay = Image.fromarray(np_mask) - if self.soft_inpainting is None: - np_mask = np.array(image_mask) - np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8) - self.mask_for_overlay = Image.fromarray(np_mask) - - self.masks_for_overlay = [] if self.soft_inpainting is not None else None self.overlay_images = [] latent_mask = self.latent_mask if self.latent_mask is not None else image_mask @@ -1504,15 +1485,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): image = images.resize_image(self.resize_mode, image, self.width, self.height) if image_mask is not None: - if self.soft_inpainting is not None: - # We apply the masks AFTER to adjust mask based on changed content. - self.overlay_images.append(image.convert('RGBA')) - self.masks_for_overlay.append(image_mask) - else: - image_masked = Image.new('RGBa', (image.width, image.height)) - image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + image_masked = Image.new('RGBa', (image.width, image.height)) + image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) - self.overlay_images.append(image_masked.convert('RGBA')) + self.overlay_images.append(image_masked.convert('RGBA')) # crop_region is not None if we are doing inpaint full res if crop_region is not None: @@ -1565,7 +1541,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 latmask = latmask[0] - if self.soft_inpainting is None: + if self.mask_round: latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) @@ -1578,7 +1554,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask - self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.soft_inpainting is None) + self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.mask_round) def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): x = self.rng.next() @@ -1589,8 +1565,14 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) - if self.mask is not None and self.soft_inpainting is None: - samples = samples * self.nmask + self.init_latent * self.mask + blended_samples = samples * self.nmask + self.init_latent * self.mask + + if self.scripts is not None: + mba = scripts.MaskBlendArgs(self, samples, self.nmask, self.init_latent, self.mask, blended_samples, sigma=None, is_final_blend=True) + self.scripts.on_mask_blend(self, mba) + blended_samples = mba.blended_latent + + samples = blended_samples del x devices.torch_gc() diff --git a/modules/scripts.py b/modules/scripts.py index 7f9454eb..92a07c56 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -11,11 +11,31 @@ from modules import shared, paths, script_callbacks, extensions, script_loading, AlwaysVisible = object() +class MaskBlendArgs: + def __init__(self, current_latent, nmask, init_latent, mask, blended_samples, denoiser=None, sigma=None): + self.current_latent = current_latent + self.nmask = nmask + self.init_latent = init_latent + self.mask = mask + self.blended_samples = blended_samples + + self.denoiser = denoiser + self.is_final_blend = denoiser is None + self.sigma = sigma + +class PostSampleArgs: + def __init__(self, samples): + self.samples = samples class PostprocessImageArgs: def __init__(self, image): self.image = image +class PostProcessMaskOverlayArgs: + def __init__(self, index, mask_for_overlay, overlay_image): + self.index = index + self.mask_for_overlay = mask_for_overlay + self.overlay_image = overlay_image class PostprocessBatchListArgs: def __init__(self, images): @@ -206,6 +226,25 @@ class Script: pass + def on_mask_blend(self, p, mba: MaskBlendArgs, *args): + """ + Called in inpainting mode when the original content is blended with the inpainted content. + This is called at every step in the denoising process and once at the end. + If is_final_blend is true, this is called for the final blending stage. + Otherwise, denoiser and sigma are defined and may be used to inform the procedure. + """ + + pass + + def post_sample(self, p, ps: PostSampleArgs, *args): + """ + Called after the samples have been generated, + but before they have been decoded by the VAE, if applicable. + Check getattr(samples, 'already_decoded', False) to test if the images are decoded. + """ + + pass + def postprocess_image(self, p, pp: PostprocessImageArgs, *args): """ Called for every image after it has been generated. @@ -213,6 +252,13 @@ class Script: pass + def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs, *args): + """ + Called for every image after it has been generated. + """ + + pass + def postprocess(self, p, processed, *args): """ This function is called after processing ends for AlwaysVisible scripts. @@ -767,6 +813,22 @@ class ScriptRunner: except Exception: errors.report(f"Error running postprocess_batch_list: {script.filename}", exc_info=True) + def post_sample(self, p, ps: PostSampleArgs): + for script in self.alwayson_scripts: + try: + script_args = p.script_args[script.args_from:script.args_to] + script.post_sample(p, ps, *script_args) + except Exception: + errors.report(f"Error running post_sample: {script.filename}", exc_info=True) + + def on_mask_blend(self, p, mba: MaskBlendArgs): + for script in self.alwayson_scripts: + try: + script_args = p.script_args[script.args_from:script.args_to] + script.on_mask_blend(p, mba, *script_args) + except Exception: + errors.report(f"Error running post_sample: {script.filename}", exc_info=True) + def postprocess_image(self, p, pp: PostprocessImageArgs): for script in self.alwayson_scripts: try: @@ -775,6 +837,14 @@ class ScriptRunner: except Exception: errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True) + def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs): + for script in self.alwayson_scripts: + try: + script_args = p.script_args[script.args_from:script.args_to] + script.postprocess_maskoverlay(p, ppmo, *script_args) + except Exception: + errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True) + def before_component(self, component, **kwargs): for callback, script in self.on_before_component_elem_id.get(kwargs.get("elem_id"), []): try: diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index f13e8dcc..eb9d5daf 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -109,19 +109,16 @@ class CFGDenoiser(torch.nn.Module): assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)" # If we use masks, blending between the denoised and original latent images occurs here. - def apply_blend(latent): - if hasattr(self.p, "denoiser_masked_blend_function") and callable(self.p.denoiser_masked_blend_function): - return self.p.denoiser_masked_blend_function( - self, - # Using an argument dictionary so that arguments can be added without breaking extensions. - args= - { - "denoiser": self, - "current_latent": latent, - "sigma": sigma - }) - else: - return self.init_latent * self.mask + self.nmask * latent + def apply_blend(current_latent): + blended_latent = current_latent * self.nmask + self.init_latent * self.mask + + if self.p.scripts is not None: + from modules import scripts + mba = scripts.MaskBlendArgs(current_latent, self.nmask, self.init_latent, self.mask, blended_latent, denoiser=self, sigma=sigma) + self.p.scripts.on_mask_blend(self.p, mba) + blended_latent = mba.blended_latent + + return blended_latent # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: -- cgit v1.2.3 From 2abc417834d752e43a283f8603bfddfb1c80b30f Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Wed, 6 Dec 2023 22:25:53 -0700 Subject: Re-implemented soft inpainting via a script. Also fixed some mistakes with the previous hooks, removed unnecessary formatting changes, removed code that I had forgotten to. --- modules/processing.py | 23 ++- modules/scripts.py | 4 +- modules/soft_inpainting.py | 308 ---------------------------------- scripts/soft_inpainting.py | 401 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 413 insertions(+), 323 deletions(-) delete mode 100644 modules/soft_inpainting.py create mode 100644 scripts/soft_inpainting.py (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 5a1a90af..f8d85bdf 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -879,14 +879,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None: ps = scripts.PostSampleArgs(samples_ddim) p.scripts.post_sample(p, ps) - samples_ddim = pp.samples + samples_ddim = ps.samples if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim else: if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method - x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) x_samples_ddim = torch.stack(x_samples_ddim).float() @@ -944,7 +943,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None: ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image) p.scripts.postprocess_maskoverlay(p, ppmo) - mask_for_overlay, overlay_image = pp.mask_for_overlay, pp.overlay_image + mask_for_overlay, overlay_image = ppmo.mask_for_overlay, ppmo.overlay_image if p.color_corrections is not None and i < len(p.color_corrections): if save_samples and opts.save_images_before_color_correction: @@ -959,7 +958,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: original_denoised_image = image.copy() if p.paste_to is not None: - original_denoised_image = uncrop(original_denoised_image, (p.overlay_image.width, p.overlay_image.height), p.paste_to) + original_denoised_image = uncrop(original_denoised_image, (overlay_image.width, overlay_image.height), p.paste_to) image = apply_overlay(image, p.paste_to, overlay_image) @@ -1512,9 +1511,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.overlay_images is not None: self.overlay_images = self.overlay_images * self.batch_size - if self.masks_for_overlay is not None: - self.masks_for_overlay = self.masks_for_overlay * self.batch_size - if self.color_corrections is not None and len(self.color_corrections) == 1: self.color_corrections = self.color_corrections * self.batch_size @@ -1565,14 +1561,15 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) - blended_samples = samples * self.nmask + self.init_latent * self.mask + if self.mask is not None: + blended_samples = samples * self.nmask + self.init_latent * self.mask - if self.scripts is not None: - mba = scripts.MaskBlendArgs(self, samples, self.nmask, self.init_latent, self.mask, blended_samples, sigma=None, is_final_blend=True) - self.scripts.on_mask_blend(self, mba) - blended_samples = mba.blended_latent + if self.scripts is not None: + mba = scripts.MaskBlendArgs(samples, self.nmask, self.init_latent, self.mask, blended_samples) + self.scripts.on_mask_blend(self, mba) + blended_samples = mba.blended_latent - samples = blended_samples + samples = blended_samples del x devices.torch_gc() diff --git a/modules/scripts.py b/modules/scripts.py index 92a07c56..b6fcf96e 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -12,12 +12,12 @@ from modules import shared, paths, script_callbacks, extensions, script_loading, AlwaysVisible = object() class MaskBlendArgs: - def __init__(self, current_latent, nmask, init_latent, mask, blended_samples, denoiser=None, sigma=None): + def __init__(self, current_latent, nmask, init_latent, mask, blended_latent, denoiser=None, sigma=None): self.current_latent = current_latent self.nmask = nmask self.init_latent = init_latent self.mask = mask - self.blended_samples = blended_samples + self.blended_latent = blended_latent self.denoiser = denoiser self.is_final_blend = denoiser is None diff --git a/modules/soft_inpainting.py b/modules/soft_inpainting.py deleted file mode 100644 index b36ac8fa..00000000 --- a/modules/soft_inpainting.py +++ /dev/null @@ -1,308 +0,0 @@ -class SoftInpaintingSettings: - def __init__(self, mask_blend_power, mask_blend_scale, inpaint_detail_preservation): - self.mask_blend_power = mask_blend_power - self.mask_blend_scale = mask_blend_scale - self.inpaint_detail_preservation = inpaint_detail_preservation - - def add_generation_params(self, dest): - dest[enabled_gen_param_label] = True - dest[gen_param_labels.mask_blend_power] = self.mask_blend_power - dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale - dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation - - -# ------------------- Methods ------------------- - - -def latent_blend(soft_inpainting, a, b, t): - """ - Interpolates two latent image representations according to the parameter t, - where the interpolated vectors' magnitudes are also interpolated separately. - The "detail_preservation" factor biases the magnitude interpolation towards - the larger of the two magnitudes. - """ - import torch - - # NOTE: We use inplace operations wherever possible. - - # [4][w][h] to [1][4][w][h] - t2 = t.unsqueeze(0) - # [4][w][h] to [1][1][w][h] - the [4] seem redundant. - t3 = t[0].unsqueeze(0).unsqueeze(0) - - one_minus_t2 = 1 - t2 - one_minus_t3 = 1 - t3 - - # Linearly interpolate the image vectors. - a_scaled = a * one_minus_t2 - b_scaled = b * t2 - image_interp = a_scaled - image_interp.add_(b_scaled) - result_type = image_interp.dtype - del a_scaled, b_scaled, t2, one_minus_t2 - - # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) - # 64-bit operations are used here to allow large exponents. - current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001) - - # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * one_minus_t3 - b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_(soft_inpainting.inpaint_detail_preservation) * t3 - desired_magnitude = a_magnitude - desired_magnitude.add_(b_magnitude).pow_(1 / soft_inpainting.inpaint_detail_preservation) - del a_magnitude, b_magnitude, t3, one_minus_t3 - - # Change the linearly interpolated image vectors' magnitudes to the value we want. - # This is the last 64-bit operation. - image_interp_scaling_factor = desired_magnitude - image_interp_scaling_factor.div_(current_magnitude) - image_interp_scaling_factor = image_interp_scaling_factor.to(result_type) - image_interp_scaled = image_interp - image_interp_scaled.mul_(image_interp_scaling_factor) - del current_magnitude - del desired_magnitude - del image_interp - del image_interp_scaling_factor - del result_type - - return image_interp_scaled - - -def get_modified_nmask(soft_inpainting, nmask, sigma): - """ - Converts a negative mask representing the transparency of the original latent vectors being overlayed - to a mask that is scaled according to the denoising strength for this step. - - Where: - 0 = fully opaque, infinite density, fully masked - 1 = fully transparent, zero density, fully unmasked - - We bring this transparency to a power, as this allows one to simulate N number of blending operations - where N can be any positive real value. Using this one can control the balance of influence between - the denoiser and the original latents according to the sigma value. - - NOTE: "mask" is not used - """ - import torch - # todo: Why is sigma 2D? Both values are the same. - return torch.pow(nmask, (sigma[0] ** soft_inpainting.mask_blend_power) * soft_inpainting.mask_blend_scale) - - -def apply_adaptive_masks( - latent_orig, - latent_processed, - overlay_images, - masks_for_overlay, - width, height, - paste_to): - import torch - import numpy as np - import modules.processing as proc - import modules.images as images - from PIL import Image, ImageOps, ImageFilter - - # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control. - # latent_mask = p.nmask[0].float().cpu() - # convert the original mask into a form we use to scale distances for thresholding - # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2)) - # mask_scalar = mask_scalar / (1.00001-mask_scalar) - # mask_scalar = mask_scalar.numpy() - - latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1) - - kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2) - - for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)): - converted_mask = distance_map.float().cpu().numpy() - converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, - percentile_min=0.9, percentile_max=1, min_width=1) - converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, - percentile_min=0.25, percentile_max=0.75, min_width=1) - - # The distance at which opacity of original decreases to 50% - # half_weighted_distance = 1 # * mask_scalar - # converted_mask = converted_mask / half_weighted_distance - - converted_mask = 1 / (1 + converted_mask ** 2) - converted_mask = images.smootherstep(converted_mask) - converted_mask = 1 - converted_mask - converted_mask = 255. * converted_mask - converted_mask = converted_mask.astype(np.uint8) - converted_mask = Image.fromarray(converted_mask) - converted_mask = images.resize_image(2, converted_mask, width, height) - converted_mask = proc.create_binary_mask(converted_mask, round=False) - - # Remove aliasing artifacts using a gaussian blur. - converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) - - # Expand the mask to fit the whole image if needed. - if paste_to is not None: - converted_mask = proc. uncrop(converted_mask, - (overlay_image.width, overlay_image.height), - paste_to) - - masks_for_overlay[i] = converted_mask - - image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) - image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), - mask=ImageOps.invert(converted_mask.convert('L'))) - - overlay_images[i] = image_masked.convert('RGBA') - -def apply_masks( - soft_inpainting, - nmask, - overlay_images, - masks_for_overlay, - width, height, - paste_to): - import torch - import numpy as np - import modules.processing as proc - import modules.images as images - from PIL import Image, ImageOps, ImageFilter - - converted_mask = nmask[0].float() - converted_mask = torch.clamp(converted_mask, min=0, max=1).pow_(soft_inpainting.mask_blend_scale / 2) - converted_mask = 255. * converted_mask - converted_mask = converted_mask.cpu().numpy().astype(np.uint8) - converted_mask = Image.fromarray(converted_mask) - converted_mask = images.resize_image(2, converted_mask, width, height) - converted_mask = proc.create_binary_mask(converted_mask, round=False) - - # Remove aliasing artifacts using a gaussian blur. - converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) - - # Expand the mask to fit the whole image if needed. - if paste_to is not None: - converted_mask = proc.uncrop(converted_mask, - (width, height), - paste_to) - - for i, overlay_image in enumerate(overlay_images): - masks_for_overlay[i] = converted_mask - - image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) - image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), - mask=ImageOps.invert(converted_mask.convert('L'))) - - overlay_images[i] = image_masked.convert('RGBA') - - -# ------------------- Constants ------------------- - - -default = SoftInpaintingSettings(1, 0.5, 4) - -enabled_ui_label = "Soft inpainting" -enabled_gen_param_label = "Soft inpainting enabled" -enabled_el_id = "soft_inpainting_enabled" - -ui_labels = SoftInpaintingSettings( - "Schedule bias", - "Preservation strength", - "Transition contrast boost") - -ui_info = SoftInpaintingSettings( - "Shifts when preservation of original content occurs during denoising.", - "How strongly partially masked content should be preserved.", - "Amplifies the contrast that may be lost in partially masked regions.") - -gen_param_labels = SoftInpaintingSettings( - "Soft inpainting schedule bias", - "Soft inpainting preservation strength", - "Soft inpainting transition contrast boost") - -el_ids = SoftInpaintingSettings( - "mask_blend_power", - "mask_blend_scale", - "inpaint_detail_preservation") - - -# ------------------- UI ------------------- - - -def gradio_ui(): - import gradio as gr - from modules.ui_components import InputAccordion - - with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled: - with gr.Group(): - gr.Markdown( - """ - Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity. - **High _Mask blur_** values are recommended! - """) - - result = SoftInpaintingSettings( - gr.Slider(label=ui_labels.mask_blend_power, - info=ui_info.mask_blend_power, - minimum=0, - maximum=8, - step=0.1, - value=default.mask_blend_power, - elem_id=el_ids.mask_blend_power), - gr.Slider(label=ui_labels.mask_blend_scale, - info=ui_info.mask_blend_scale, - minimum=0, - maximum=8, - step=0.05, - value=default.mask_blend_scale, - elem_id=el_ids.mask_blend_scale), - gr.Slider(label=ui_labels.inpaint_detail_preservation, - info=ui_info.inpaint_detail_preservation, - minimum=1, - maximum=32, - step=0.5, - value=default.inpaint_detail_preservation, - elem_id=el_ids.inpaint_detail_preservation)) - - with gr.Accordion("Help", open=False): - gr.Markdown( - f""" - ### {ui_labels.mask_blend_power} - - The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas). - This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step. - This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation. - - - **Below 1**: Stronger preservation near the end (with low sigma) - - **1**: Balanced (proportional to sigma) - - **Above 1**: Stronger preservation in the beginning (with high sigma) - """) - gr.Markdown( - f""" - ### {ui_labels.mask_blend_scale} - - Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content. - This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength. - - - **Low values**: Favors generated content. - - **High values**: Favors original content. - """) - gr.Markdown( - f""" - ### {ui_labels.inpaint_detail_preservation} - - This parameter controls how the original latent vectors and denoised latent vectors are interpolated. - With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors. - This can prevent the loss of contrast that occurs with linear interpolation. - - - **Low values**: Softer blending, details may fade. - - **High values**: Stronger contrast, may over-saturate colors. - """) - - return ( - [ - soft_inpainting_enabled, - result.mask_blend_power, - result.mask_blend_scale, - result.inpaint_detail_preservation - ], - [ - (soft_inpainting_enabled, enabled_gen_param_label), - (result.mask_blend_power, gen_param_labels.mask_blend_power), - (result.mask_blend_scale, gen_param_labels.mask_blend_scale), - (result.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation) - ] - ) diff --git a/scripts/soft_inpainting.py b/scripts/soft_inpainting.py new file mode 100644 index 00000000..47e0269b --- /dev/null +++ b/scripts/soft_inpainting.py @@ -0,0 +1,401 @@ +import gradio as gr +from modules.ui_components import InputAccordion +import modules.scripts as scripts + + +class SoftInpaintingSettings: + def __init__(self, mask_blend_power, mask_blend_scale, inpaint_detail_preservation): + self.mask_blend_power = mask_blend_power + self.mask_blend_scale = mask_blend_scale + self.inpaint_detail_preservation = inpaint_detail_preservation + + def add_generation_params(self, dest): + dest[enabled_gen_param_label] = True + dest[gen_param_labels.mask_blend_power] = self.mask_blend_power + dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale + dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation + + +# ------------------- Methods ------------------- + + +def latent_blend(soft_inpainting, a, b, t): + """ + Interpolates two latent image representations according to the parameter t, + where the interpolated vectors' magnitudes are also interpolated separately. + The "detail_preservation" factor biases the magnitude interpolation towards + the larger of the two magnitudes. + """ + import torch + + # NOTE: We use inplace operations wherever possible. + + # [4][w][h] to [1][4][w][h] + t2 = t.unsqueeze(0) + # [4][w][h] to [1][1][w][h] - the [4] seem redundant. + t3 = t[0].unsqueeze(0).unsqueeze(0) + + one_minus_t2 = 1 - t2 + one_minus_t3 = 1 - t3 + + # Linearly interpolate the image vectors. + a_scaled = a * one_minus_t2 + b_scaled = b * t2 + image_interp = a_scaled + image_interp.add_(b_scaled) + result_type = image_interp.dtype + del a_scaled, b_scaled, t2, one_minus_t2 + + # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) + # 64-bit operations are used here to allow large exponents. + current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001) + + # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). + a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_( + soft_inpainting.inpaint_detail_preservation) * one_minus_t3 + b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_( + soft_inpainting.inpaint_detail_preservation) * t3 + desired_magnitude = a_magnitude + desired_magnitude.add_(b_magnitude).pow_(1 / soft_inpainting.inpaint_detail_preservation) + del a_magnitude, b_magnitude, t3, one_minus_t3 + + # Change the linearly interpolated image vectors' magnitudes to the value we want. + # This is the last 64-bit operation. + image_interp_scaling_factor = desired_magnitude + image_interp_scaling_factor.div_(current_magnitude) + image_interp_scaling_factor = image_interp_scaling_factor.to(result_type) + image_interp_scaled = image_interp + image_interp_scaled.mul_(image_interp_scaling_factor) + del current_magnitude + del desired_magnitude + del image_interp + del image_interp_scaling_factor + del result_type + + return image_interp_scaled + + +def get_modified_nmask(soft_inpainting, nmask, sigma): + """ + Converts a negative mask representing the transparency of the original latent vectors being overlayed + to a mask that is scaled according to the denoising strength for this step. + + Where: + 0 = fully opaque, infinite density, fully masked + 1 = fully transparent, zero density, fully unmasked + + We bring this transparency to a power, as this allows one to simulate N number of blending operations + where N can be any positive real value. Using this one can control the balance of influence between + the denoiser and the original latents according to the sigma value. + + NOTE: "mask" is not used + """ + import torch + return torch.pow(nmask, (sigma ** soft_inpainting.mask_blend_power) * soft_inpainting.mask_blend_scale) + + +def apply_adaptive_masks( + latent_orig, + latent_processed, + overlay_images, + width, height, + paste_to): + import torch + import numpy as np + import modules.processing as proc + import modules.images as images + from PIL import Image, ImageOps, ImageFilter + + # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control. + # latent_mask = p.nmask[0].float().cpu() + # convert the original mask into a form we use to scale distances for thresholding + # mask_scalar = 1-(torch.clamp(latent_mask, min=0, max=1) ** (p.mask_blend_scale / 2)) + # mask_scalar = mask_scalar / (1.00001-mask_scalar) + # mask_scalar = mask_scalar.numpy() + + latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1) + + kernel, kernel_center = images.get_gaussian_kernel(stddev_radius=1.5, max_radius=2) + + masks_for_overlay = [] + + for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)): + converted_mask = distance_map.float().cpu().numpy() + converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.9, percentile_max=1, min_width=1) + converted_mask = images.weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.25, percentile_max=0.75, min_width=1) + + # The distance at which opacity of original decreases to 50% + # half_weighted_distance = 1 # * mask_scalar + # converted_mask = converted_mask / half_weighted_distance + + converted_mask = 1 / (1 + converted_mask ** 2) + converted_mask = images.smootherstep(converted_mask) + converted_mask = 1 - converted_mask + converted_mask = 255. * converted_mask + converted_mask = converted_mask.astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, width, height) + converted_mask = proc.create_binary_mask(converted_mask, round=False) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if paste_to is not None: + converted_mask = proc.uncrop(converted_mask, + (overlay_image.width, overlay_image.height), + paste_to) + + masks_for_overlay.append(converted_mask) + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + overlay_images[i] = image_masked.convert('RGBA') + + return masks_for_overlay + + +def apply_masks( + soft_inpainting, + nmask, + overlay_images, + width, height, + paste_to): + import torch + import numpy as np + import modules.processing as proc + import modules.images as images + from PIL import Image, ImageOps, ImageFilter + + converted_mask = nmask[0].float() + converted_mask = torch.clamp(converted_mask, min=0, max=1).pow_(soft_inpainting.mask_blend_scale / 2) + converted_mask = 255. * converted_mask + converted_mask = converted_mask.cpu().numpy().astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, width, height) + converted_mask = proc.create_binary_mask(converted_mask, round=False) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if paste_to is not None: + converted_mask = proc.uncrop(converted_mask, + (width, height), + paste_to) + + masks_for_overlay = [] + + for i, overlay_image in enumerate(overlay_images): + masks_for_overlay[i] = converted_mask + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + overlay_images[i] = image_masked.convert('RGBA') + + return masks_for_overlay + + +# ------------------- Constants ------------------- + + +default = SoftInpaintingSettings(1, 0.5, 4) + +enabled_ui_label = "Soft inpainting" +enabled_gen_param_label = "Soft inpainting enabled" +enabled_el_id = "soft_inpainting_enabled" + +ui_labels = SoftInpaintingSettings( + "Schedule bias", + "Preservation strength", + "Transition contrast boost") + +ui_info = SoftInpaintingSettings( + "Shifts when preservation of original content occurs during denoising.", + "How strongly partially masked content should be preserved.", + "Amplifies the contrast that may be lost in partially masked regions.") + +gen_param_labels = SoftInpaintingSettings( + "Soft inpainting schedule bias", + "Soft inpainting preservation strength", + "Soft inpainting transition contrast boost") + +el_ids = SoftInpaintingSettings( + "mask_blend_power", + "mask_blend_scale", + "inpaint_detail_preservation") + + +class Script(scripts.Script): + + def __init__(self): + self.masks_for_overlay = None + self.overlay_images = None + + def title(self): + return "Soft Inpainting" + + def show(self, is_img2img): + return scripts.AlwaysVisible if is_img2img else False + + def ui(self, is_img2img): + if not is_img2img: + return + + with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled: + with gr.Group(): + gr.Markdown( + """ + Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity. + **High _Mask blur_** values are recommended! + """) + + result = SoftInpaintingSettings( + gr.Slider(label=ui_labels.mask_blend_power, + info=ui_info.mask_blend_power, + minimum=0, + maximum=8, + step=0.1, + value=default.mask_blend_power, + elem_id=el_ids.mask_blend_power), + gr.Slider(label=ui_labels.mask_blend_scale, + info=ui_info.mask_blend_scale, + minimum=0, + maximum=8, + step=0.05, + value=default.mask_blend_scale, + elem_id=el_ids.mask_blend_scale), + gr.Slider(label=ui_labels.inpaint_detail_preservation, + info=ui_info.inpaint_detail_preservation, + minimum=1, + maximum=32, + step=0.5, + value=default.inpaint_detail_preservation, + elem_id=el_ids.inpaint_detail_preservation)) + + with gr.Accordion("Help", open=False): + gr.Markdown( + f""" + ### {ui_labels.mask_blend_power} + + The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas). + This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step. + This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation. + + - **Below 1**: Stronger preservation near the end (with low sigma) + - **1**: Balanced (proportional to sigma) + - **Above 1**: Stronger preservation in the beginning (with high sigma) + """) + gr.Markdown( + f""" + ### {ui_labels.mask_blend_scale} + + Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content. + This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength. + + - **Low values**: Favors generated content. + - **High values**: Favors original content. + """) + gr.Markdown( + f""" + ### {ui_labels.inpaint_detail_preservation} + + This parameter controls how the original latent vectors and denoised latent vectors are interpolated. + With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors. + This can prevent the loss of contrast that occurs with linear interpolation. + + - **Low values**: Softer blending, details may fade. + - **High values**: Stronger contrast, may over-saturate colors. + """) + + self.infotext_fields = [(soft_inpainting_enabled, enabled_gen_param_label), + (result.mask_blend_power, gen_param_labels.mask_blend_power), + (result.mask_blend_scale, gen_param_labels.mask_blend_scale), + (result.inpaint_detail_preservation, gen_param_labels.inpaint_detail_preservation)] + + self.paste_field_names = [] + for _, field_name in self.infotext_fields: + self.paste_field_names.append(field_name) + + return [soft_inpainting_enabled, + result.mask_blend_power, + result.mask_blend_scale, + result.inpaint_detail_preservation] + + def process(self, p, enabled, power, scale, detail_preservation): + if not enabled: + return + + # Shut off the rounding it normally does. + p.mask_round = False + + settings = SoftInpaintingSettings(power, scale, detail_preservation) + + # p.extra_generation_params["Mask rounding"] = False + settings.add_generation_params(p.extra_generation_params) + + def on_mask_blend(self, p, mba: scripts.MaskBlendArgs, enabled, power, scale, detail_preservation): + if not enabled: + return + + if mba.sigma is None: + mba.blended_latent = mba.current_latent + return + + settings = SoftInpaintingSettings(power, scale, detail_preservation) + + # todo: Why is sigma 2D? Both values are the same. + mba.blended_latent = latent_blend(settings, + mba.init_latent, + mba.current_latent, + get_modified_nmask(settings, mba.nmask, mba.sigma[0])) + + def post_sample(self, p, ps: scripts.PostSampleArgs, enabled, power, scale, detail_preservation): + if not enabled: + return + + settings = SoftInpaintingSettings(power, scale, detail_preservation) + + from modules import images + from modules.shared import opts + + # since the original code puts holes in the existing overlay images, + # we have to rebuild them. + self.overlay_images = [] + for img in p.init_images: + + image = images.flatten(img, opts.img2img_background_color) + + if p.paste_to is None and p.resize_mode != 3: + image = images.resize_image(p.resize_mode, image, p.width, p.height) + + self.overlay_images.append(image.convert('RGBA')) + + if getattr(ps.samples, 'already_decoded', False): + self.masks_for_overlay = apply_masks(soft_inpainting=settings, + nmask=p.nmask, + overlay_images=self.overlay_images, + width=p.width, + height=p.height, + paste_to=p.paste_to) + else: + self.masks_for_overlay = apply_adaptive_masks(latent_orig=p.init_latent, + latent_processed=ps.samples, + overlay_images=self.overlay_images, + width=p.width, + height=p.height, + paste_to=p.paste_to) + + + def postprocess_maskoverlay(self, p, ppmo: scripts.PostProcessMaskOverlayArgs, enabled, power, scale, detail_preservation): + if not enabled: + return + + ppmo.mask_for_overlay = self.masks_for_overlay[ppmo.index] + ppmo.overlay_image = self.overlay_images[ppmo.index] \ No newline at end of file -- cgit v1.2.3 From 0ef4a4cb2365051b1e308f0136a0d8c01d071569 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Thu, 7 Dec 2023 14:54:26 -0700 Subject: Fixed error that occurs when using vanilla samplers (somehow). --- modules/processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index f8d85bdf..bea01ec6 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -937,8 +937,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: p.scripts.postprocess_image(p, pp) image = pp.image - mask_for_overlay = p.mask_for_overlay - overlay_image = p.overlay_images[i] if p.overlay_images is not None and i < len(p.overlay_images) else None + mask_for_overlay = getattr(p, "mask_for_overlay", None) + overlay_image = p.overlay_images[i] if getattr(p, "overlay_images", None) is not None and i < len(p.overlay_images) else None if p.scripts is not None: ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image) -- cgit v1.2.3