From 269833067de1e7d0b6a6bd65724743d6b88a133f Mon Sep 17 00:00:00 2001 From: Kyle Date: Thu, 2 Feb 2023 09:37:01 -0500 Subject: instruct-pix2pix support --- modules/sd_samplers_kdiffusion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index aa7f106b..31ee22d3 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -77,9 +77,9 @@ class CFGDenoiser(torch.nn.Module): batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] - x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) - image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) - sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) + x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x]) + sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond] + [image_cond]) denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps) cfg_denoiser_callback(denoiser_params) @@ -88,7 +88,7 @@ class CFGDenoiser(torch.nn.Module): sigma_in = denoiser_params.sigma if tensor.shape[1] == uncond.shape[1]: - cond_in = torch.cat([tensor, uncond]) + cond_in = torch.cat([tensor, uncond, uncond]) if shared.batch_cond_uncond: x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]}) -- cgit v1.2.3 From cf0cfefe910b0de18c4751ce8d8cf7a6053a39b0 Mon Sep 17 00:00:00 2001 From: Kyle Date: Thu, 2 Feb 2023 19:15:38 -0500 Subject: Revert "instruct-pix2pix support" This reverts commit 269833067de1e7d0b6a6bd65724743d6b88a133f. --- modules/processing.py | 2 +- modules/sd_samplers_kdiffusion.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/processing.py b/modules/processing.py index f299e04d..e544c2e1 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -186,7 +186,7 @@ class StableDiffusionProcessing: return conditioning def edit_image_conditioning(self, source_image): - conditioning_image = self.sd_model.encode_first_stage(source_image).mode() + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image)) return conditioning_image diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 31ee22d3..aa7f106b 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -77,9 +77,9 @@ class CFGDenoiser(torch.nn.Module): batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] - x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x]) - sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) - image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond] + [image_cond]) + x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) + sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps) cfg_denoiser_callback(denoiser_params) @@ -88,7 +88,7 @@ class CFGDenoiser(torch.nn.Module): sigma_in = denoiser_params.sigma if tensor.shape[1] == uncond.shape[1]: - cond_in = torch.cat([tensor, uncond, uncond]) + cond_in = torch.cat([tensor, uncond]) if shared.batch_cond_uncond: x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]}) -- cgit v1.2.3 From 6c6c6636bb123d664999c888cda47a1f8bad635b Mon Sep 17 00:00:00 2001 From: Kyle Date: Fri, 3 Feb 2023 18:19:56 -0500 Subject: Image CFG Added (Full Implementation) Uses separate denoiser for edit (instruct-pix2pix) models No impact to txt2img or regular img2img "Image CFG Scale" will only apply to instruct-pix2pix models and metadata will only be added if using such model --- modules/img2img.py | 3 +- modules/processing.py | 4 +- modules/sd_samplers_kdiffusion.py | 101 +++++++++++++++++++++++++++++++++++--- modules/ui.py | 3 ++ 4 files changed, 103 insertions(+), 8 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/img2img.py b/modules/img2img.py index f813299c..bcc158dc 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -76,7 +76,7 @@ def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args): processed_image.save(os.path.join(output_dir, filename)) -def img2img(id_task: str, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, steps: int, sampler_index: int, mask_blur: int, mask_alpha: float, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, *args): +def img2img(id_task: str, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, steps: int, sampler_index: int, mask_blur: int, mask_alpha: float, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, image_cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, *args): override_settings = create_override_settings_dict(override_settings_texts) is_batch = mode == 5 @@ -142,6 +142,7 @@ def img2img(id_task: str, mode: int, prompt: str, negative_prompt: str, prompt_s inpainting_fill=inpainting_fill, resize_mode=resize_mode, denoising_strength=denoising_strength, + image_cfg_scale=image_cfg_scale, inpaint_full_res=inpaint_full_res, inpaint_full_res_padding=inpaint_full_res_padding, inpainting_mask_invert=inpainting_mask_invert, diff --git a/modules/processing.py b/modules/processing.py index f299e04d..c33694cc 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -445,6 +445,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "Steps": p.steps, "Sampler": p.sampler_name, "CFG scale": p.cfg_scale, + "Image CFG scale": getattr(p, 'image_cfg_scale', None), "Seed": all_seeds[index], "Face restoration": (opts.face_restoration_model if p.restore_faces else None), "Size": f"{p.width}x{p.height}", @@ -901,12 +902,13 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): sampler = None - def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_strength: float = 0.75, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, **kwargs): + def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, **kwargs): super().__init__(**kwargs) self.init_images = init_images self.resize_mode: int = resize_mode self.denoising_strength: float = denoising_strength + self.image_cfg_scale: float = image_cfg_scale if shared.sd_model.cond_stage_key == "edit" else None self.init_latent = None self.image_mask = mask self.latent_mask = None diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index aa7f106b..a16ba69b 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -1,6 +1,7 @@ from collections import deque import torch import inspect +import einops import k_diffusion.sampling from modules import prompt_parser, devices, sd_samplers_common @@ -40,6 +41,90 @@ sampler_extra_params = { 'sample_dpm_2': ['s_churn', 's_tmin', 's_tmax', 's_noise'], } +class CFGDenoiserEdit(torch.nn.Module): + """ + Classifier free guidance denoiser. A wrapper for stable diffusion model (specifically for unet) + that can take a noisy picture and produce a noise-free picture using two guidances (prompts) + instead of one. Originally, the second prompt is just an empty string, but we use non-empty + negative prompt. + """ + + def __init__(self, model): + super().__init__() + self.inner_model = model + self.mask = None + self.nmask = None + self.init_latent = None + self.step = 0 + + def combine_denoised(self, x_out, conds_list, uncond, cond_scale, image_cfg_scale): + denoised_uncond = x_out[-uncond.shape[0]:] + denoised = torch.clone(denoised_uncond) + + for i, conds in enumerate(conds_list): + for cond_index, weight in conds: + out_cond, out_img_cond, out_uncond = x_out.chunk(3) + denoised[i] = out_uncond[cond_index] + cond_scale * (out_cond[cond_index] - out_img_cond[cond_index]) + image_cfg_scale * (out_img_cond[cond_index] - out_uncond[cond_index]) + + return denoised + + def forward(self, x, sigma, uncond, cond, cond_scale, image_cond, image_cfg_scale): + if state.interrupted or state.skipped: + raise sd_samplers_common.InterruptedException + + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) + uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) + + batch_size = len(conds_list) + repeats = [len(conds_list[i]) for i in range(batch_size)] + + x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x]) + sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond] + [torch.zeros_like(self.init_latent)]) + + denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps) + cfg_denoiser_callback(denoiser_params) + x_in = denoiser_params.x + image_cond_in = denoiser_params.image_cond + sigma_in = denoiser_params.sigma + + if tensor.shape[1] == uncond.shape[1]: + cond_in = torch.cat([tensor, uncond, uncond]) + + if shared.batch_cond_uncond: + x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]}) + else: + x_out = torch.zeros_like(x_in) + for batch_offset in range(0, x_out.shape[0], batch_size): + a = batch_offset + b = a + batch_size + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [cond_in[a:b]], "c_concat": [image_cond_in[a:b]]}) + else: + x_out = torch.zeros_like(x_in) + batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size + for batch_offset in range(0, tensor.shape[0], batch_size): + a = batch_offset + b = min(a + batch_size, tensor.shape[0]) + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": torch.cat([tensor[a:b]], uncond) , "c_concat": [image_cond_in[a:b]]}) + + x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]}) + + devices.test_for_nans(x_out, "unet") + + if opts.live_preview_content == "Prompt": + sd_samplers_common.store_latent(x_out[0:uncond.shape[0]]) + elif opts.live_preview_content == "Negative prompt": + sd_samplers_common.store_latent(x_out[-uncond.shape[0]:]) + + denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale, image_cfg_scale) + + if self.mask is not None: + denoised = self.init_latent * self.mask + self.nmask * denoised + + self.step += 1 + + return denoised + class CFGDenoiser(torch.nn.Module): """ @@ -78,8 +163,8 @@ class CFGDenoiser(torch.nn.Module): repeats = [len(conds_list[i]) for i in range(batch_size)] x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) - image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps) cfg_denoiser_callback(denoiser_params) @@ -160,7 +245,7 @@ class KDiffusionSampler: self.funcname = funcname self.func = getattr(k_diffusion.sampling, self.funcname) self.extra_params = sampler_extra_params.get(funcname, []) - self.model_wrap_cfg = CFGDenoiser(self.model_wrap) + self.model_wrap_cfg = CFGDenoiser(self.model_wrap) if not shared.sd_model.cond_stage_key == "edit" else CFGDenoiserEdit(self.model_wrap) self.sampler_noises = None self.stop_at = None self.eta = None @@ -260,13 +345,17 @@ class KDiffusionSampler: self.model_wrap_cfg.init_latent = x self.last_latent = x - - samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args={ + extra_args={ 'cond': conditioning, 'image_cond': image_conditioning, 'uncond': unconditional_conditioning, - 'cond_scale': p.cfg_scale - }, disable=False, callback=self.callback_state, **extra_params_kwargs)) + 'cond_scale': p.cfg_scale, + } + + if p.image_cfg_scale: + extra_args['image_cfg_scale'] = p.image_cfg_scale + + samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs)) return samples diff --git a/modules/ui.py b/modules/ui.py index 5e34fb07..f2f7de8b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -766,6 +766,7 @@ def create_ui(): elif category == "cfg": with FormGroup(): cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="img2img_cfg_scale") + image_cfg_scale = gr.Slider(minimum=0, maximum=3.0, step=0.05, label='Image CFG Scale (for instruct-pix2pix models only)', value=1.5, elem_id="img2img_image_cfg_scale") denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength") elif category == "seed": @@ -861,6 +862,7 @@ def create_ui(): batch_count, batch_size, cfg_scale, + image_cfg_scale, denoising_strength, seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox, @@ -947,6 +949,7 @@ def create_ui(): (sampler_index, "Sampler"), (restore_faces, "Face restoration"), (cfg_scale, "CFG scale"), + (image_cfg_scale, "Image CFG scale"), (seed, "Seed"), (width, "Size-1"), (height, "Size-2"), -- cgit v1.2.3 From c27c0de0f73c5f533acfa10426dbac7ac988bc85 Mon Sep 17 00:00:00 2001 From: Kyle Date: Fri, 3 Feb 2023 19:15:32 -0500 Subject: txt2img Hires Fix --- modules/processing.py | 1 + modules/sd_samplers_kdiffusion.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/processing.py b/modules/processing.py index c33694cc..e1b53ac0 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -268,6 +268,7 @@ class Processed: self.height = p.height self.sampler_name = p.sampler_name self.cfg_scale = p.cfg_scale + self.image_cfg_scale = getattr(p, 'image_cfg_scale', None) self.steps = p.steps self.batch_size = p.batch_size self.restore_faces = p.restore_faces diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index a16ba69b..6107e99e 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -352,7 +352,7 @@ class KDiffusionSampler: 'cond_scale': p.cfg_scale, } - if p.image_cfg_scale: + if hasattr(p, 'image_cfg_scale'): extra_args['image_cfg_scale'] = p.image_cfg_scale samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs)) -- cgit v1.2.3 From ba6a4e7e9431d02ba3656c6ae44d5dfe29908d68 Mon Sep 17 00:00:00 2001 From: Kyle Date: Fri, 3 Feb 2023 19:46:13 -0500 Subject: Use original CFGDenoiser if image_cfg_scale = 1 If image_cfg_scale is =1 then the original image is not used for the output. We can then use the original CFGDenoiser to get the same result to support AND functionality. Maybe in the future AND can be supported with "Image CFG Scale" --- modules/sd_samplers_kdiffusion.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 6107e99e..6c57fdec 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -245,7 +245,7 @@ class KDiffusionSampler: self.funcname = funcname self.func = getattr(k_diffusion.sampling, self.funcname) self.extra_params = sampler_extra_params.get(funcname, []) - self.model_wrap_cfg = CFGDenoiser(self.model_wrap) if not shared.sd_model.cond_stage_key == "edit" else CFGDenoiserEdit(self.model_wrap) + self.model_wrap_cfg = CFGDenoiser(self.model_wrap) self.sampler_noises = None self.stop_at = None self.eta = None @@ -280,6 +280,9 @@ class KDiffusionSampler: return p.steps def initialize(self, p): + if shared.sd_model.cond_stage_key == "edit" and getattr(p, 'image_cfg_scale', None) != 1: + self.model_wrap_cfg = CFGDenoiserEdit(self.model_wrap) + self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None self.model_wrap_cfg.step = 0 @@ -352,7 +355,7 @@ class KDiffusionSampler: 'cond_scale': p.cfg_scale, } - if hasattr(p, 'image_cfg_scale'): + if hasattr(p, 'image_cfg_scale') and p.image_cfg_scale != 1 and p.image_cfg_scale != None: extra_args['image_cfg_scale'] = p.image_cfg_scale samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs)) -- cgit v1.2.3 From 72dd5785d9721b95e8d61210a56be8f6c6b1e97d Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 4 Feb 2023 11:06:17 +0300 Subject: merge CFGDenoiserEdit and CFGDenoiser into single object --- modules/sd_samplers_kdiffusion.py | 133 +++++++++++--------------------------- 1 file changed, 37 insertions(+), 96 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 6c57fdec..f076fc55 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -41,90 +41,6 @@ sampler_extra_params = { 'sample_dpm_2': ['s_churn', 's_tmin', 's_tmax', 's_noise'], } -class CFGDenoiserEdit(torch.nn.Module): - """ - Classifier free guidance denoiser. A wrapper for stable diffusion model (specifically for unet) - that can take a noisy picture and produce a noise-free picture using two guidances (prompts) - instead of one. Originally, the second prompt is just an empty string, but we use non-empty - negative prompt. - """ - - def __init__(self, model): - super().__init__() - self.inner_model = model - self.mask = None - self.nmask = None - self.init_latent = None - self.step = 0 - - def combine_denoised(self, x_out, conds_list, uncond, cond_scale, image_cfg_scale): - denoised_uncond = x_out[-uncond.shape[0]:] - denoised = torch.clone(denoised_uncond) - - for i, conds in enumerate(conds_list): - for cond_index, weight in conds: - out_cond, out_img_cond, out_uncond = x_out.chunk(3) - denoised[i] = out_uncond[cond_index] + cond_scale * (out_cond[cond_index] - out_img_cond[cond_index]) + image_cfg_scale * (out_img_cond[cond_index] - out_uncond[cond_index]) - - return denoised - - def forward(self, x, sigma, uncond, cond, cond_scale, image_cond, image_cfg_scale): - if state.interrupted or state.skipped: - raise sd_samplers_common.InterruptedException - - conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) - uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) - - batch_size = len(conds_list) - repeats = [len(conds_list[i]) for i in range(batch_size)] - - x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x]) - sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) - image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond] + [torch.zeros_like(self.init_latent)]) - - denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps) - cfg_denoiser_callback(denoiser_params) - x_in = denoiser_params.x - image_cond_in = denoiser_params.image_cond - sigma_in = denoiser_params.sigma - - if tensor.shape[1] == uncond.shape[1]: - cond_in = torch.cat([tensor, uncond, uncond]) - - if shared.batch_cond_uncond: - x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]}) - else: - x_out = torch.zeros_like(x_in) - for batch_offset in range(0, x_out.shape[0], batch_size): - a = batch_offset - b = a + batch_size - x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [cond_in[a:b]], "c_concat": [image_cond_in[a:b]]}) - else: - x_out = torch.zeros_like(x_in) - batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size - for batch_offset in range(0, tensor.shape[0], batch_size): - a = batch_offset - b = min(a + batch_size, tensor.shape[0]) - x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": torch.cat([tensor[a:b]], uncond) , "c_concat": [image_cond_in[a:b]]}) - - x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]}) - - devices.test_for_nans(x_out, "unet") - - if opts.live_preview_content == "Prompt": - sd_samplers_common.store_latent(x_out[0:uncond.shape[0]]) - elif opts.live_preview_content == "Negative prompt": - sd_samplers_common.store_latent(x_out[-uncond.shape[0]:]) - - denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale, image_cfg_scale) - - if self.mask is not None: - denoised = self.init_latent * self.mask + self.nmask * denoised - - self.step += 1 - - return denoised - class CFGDenoiser(torch.nn.Module): """ @@ -141,6 +57,7 @@ class CFGDenoiser(torch.nn.Module): self.nmask = None self.init_latent = None self.step = 0 + self.image_cfg_scale = None def combine_denoised(self, x_out, conds_list, uncond, cond_scale): denoised_uncond = x_out[-uncond.shape[0]:] @@ -152,19 +69,36 @@ class CFGDenoiser(torch.nn.Module): return denoised + def combine_denoised_for_edit_model(self, x_out, cond_scale): + out_cond, out_img_cond, out_uncond = x_out.chunk(3) + denoised = out_uncond + cond_scale * (out_cond - out_img_cond) + self.image_cfg_scale * (out_img_cond - out_uncond) + + return denoised + def forward(self, x, sigma, uncond, cond, cond_scale, image_cond): if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException + # at self.image_cfg_scale == 1.0 produced results for edit model are the same as with normal sampling, + # so is_edit_model is set to False to support AND composition. + is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0 + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) + assert not is_edit_model or all([len(conds) == 1 for conds in conds_list]), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)" + batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] - x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) - sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) - image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) + if not is_edit_model: + x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) + sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) + else: + x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x]) + sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond] + [torch.zeros_like(self.init_latent)]) denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps) cfg_denoiser_callback(denoiser_params) @@ -173,7 +107,10 @@ class CFGDenoiser(torch.nn.Module): sigma_in = denoiser_params.sigma if tensor.shape[1] == uncond.shape[1]: - cond_in = torch.cat([tensor, uncond]) + if not is_edit_model: + cond_in = torch.cat([tensor, uncond]) + else: + cond_in = torch.cat([tensor, uncond, uncond]) if shared.batch_cond_uncond: x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]}) @@ -189,7 +126,13 @@ class CFGDenoiser(torch.nn.Module): for batch_offset in range(0, tensor.shape[0], batch_size): a = batch_offset b = min(a + batch_size, tensor.shape[0]) - x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [tensor[a:b]], "c_concat": [image_cond_in[a:b]]}) + + if not is_edit_model: + c_crossattn = [tensor[a:b]] + else: + c_crossattn = torch.cat([tensor[a:b]], uncond) + + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": c_crossattn, "c_concat": [image_cond_in[a:b]]}) x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]}) @@ -200,7 +143,10 @@ class CFGDenoiser(torch.nn.Module): elif opts.live_preview_content == "Negative prompt": sd_samplers_common.store_latent(x_out[-uncond.shape[0]:]) - denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale) + if not is_edit_model: + denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale) + else: + denoised = self.combine_denoised_for_edit_model(x_out, cond_scale) if self.mask is not None: denoised = self.init_latent * self.mask + self.nmask * denoised @@ -280,12 +226,10 @@ class KDiffusionSampler: return p.steps def initialize(self, p): - if shared.sd_model.cond_stage_key == "edit" and getattr(p, 'image_cfg_scale', None) != 1: - self.model_wrap_cfg = CFGDenoiserEdit(self.model_wrap) - self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None self.model_wrap_cfg.step = 0 + self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None) self.eta = p.eta if p.eta is not None else opts.eta_ancestral k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else []) @@ -355,9 +299,6 @@ class KDiffusionSampler: 'cond_scale': p.cfg_scale, } - if hasattr(p, 'image_cfg_scale') and p.image_cfg_scale != 1 and p.image_cfg_scale != None: - extra_args['image_cfg_scale'] = p.image_cfg_scale - samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs)) return samples -- cgit v1.2.3 From 9e27af76d14dc6d8a5062ab9c0db128a917ada17 Mon Sep 17 00:00:00 2001 From: RcINS Date: Sat, 11 Feb 2023 10:12:16 +0800 Subject: Fix DPM++ SDE not deterministic across different batch sizes (#5210) --- modules/sd_samplers_kdiffusion.py | 37 +++++++++++++++++++++++++++++-------- modules/shared.py | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index f076fc55..d143d41e 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -269,6 +269,15 @@ class KDiffusionSampler: return sigmas + def create_noise_sampler(self, x, sigmas, seeds): + """For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes""" + if shared.opts.no_dpmpp_sde_batch_determinism: + return None + + from k_diffusion.sampling import BrownianTreeNoiseSampler + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seeds) + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps) @@ -278,18 +287,24 @@ class KDiffusionSampler: xi = x + noise * sigma_sched[0] extra_params_kwargs = self.initialize(p) - if 'sigma_min' in inspect.signature(self.func).parameters: + parameters = inspect.signature(self.func).parameters + + if 'sigma_min' in parameters: ## last sigma is zero which isn't allowed by DPM Fast & Adaptive so taking value before last extra_params_kwargs['sigma_min'] = sigma_sched[-2] - if 'sigma_max' in inspect.signature(self.func).parameters: + if 'sigma_max' in parameters: extra_params_kwargs['sigma_max'] = sigma_sched[0] - if 'n' in inspect.signature(self.func).parameters: + if 'n' in parameters: extra_params_kwargs['n'] = len(sigma_sched) - 1 - if 'sigma_sched' in inspect.signature(self.func).parameters: + if 'sigma_sched' in parameters: extra_params_kwargs['sigma_sched'] = sigma_sched - if 'sigmas' in inspect.signature(self.func).parameters: + if 'sigmas' in parameters: extra_params_kwargs['sigmas'] = sigma_sched + if self.funcname == 'sample_dpmpp_sde': + noise_sampler = self.create_noise_sampler(x, sigmas, p.all_seeds) + extra_params_kwargs['noise_sampler'] = noise_sampler + self.model_wrap_cfg.init_latent = x self.last_latent = x extra_args={ @@ -303,7 +318,7 @@ class KDiffusionSampler: return samples - def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning = None): + def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps = steps or p.steps sigmas = self.get_sigmas(p, steps) @@ -311,14 +326,20 @@ class KDiffusionSampler: x = x * sigmas[0] extra_params_kwargs = self.initialize(p) - if 'sigma_min' in inspect.signature(self.func).parameters: + parameters = inspect.signature(self.func).parameters + + if 'sigma_min' in parameters: extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item() extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item() - if 'n' in inspect.signature(self.func).parameters: + if 'n' in parameters: extra_params_kwargs['n'] = steps else: extra_params_kwargs['sigmas'] = sigmas + if self.funcname == 'sample_dpmpp_sde': + noise_sampler = self.create_noise_sampler(x, sigmas, p.all_seeds) + extra_params_kwargs['noise_sampler'] = noise_sampler + self.last_latent = x samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={ 'cond': conditioning, diff --git a/modules/shared.py b/modules/shared.py index 79fbf724..22344431 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -414,6 +414,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { options_templates.update(options_section(('compatibility', "Compatibility"), { "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), "use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."), + "no_dpmpp_sde_batch_determinism": OptionInfo(False, "Do not make DPM++ SDE deterministic across different batch sizes."), "use_old_hires_fix_width_height": OptionInfo(False, "For hires fix, use width/height sliders to set final resolution rather than first pass (disables Upscale by, Resize width/height to)."), })) -- cgit v1.2.3 From b78c5e87baaf8c88d039bf60082c3b5ae35ec4ff Mon Sep 17 00:00:00 2001 From: opparco Date: Sat, 11 Feb 2023 11:18:38 +0900 Subject: Add cfg_denoised_callback --- modules/script_callbacks.py | 29 +++++++++++++++++++++++++++++ modules/sd_samplers_kdiffusion.py | 4 ++++ 2 files changed, 33 insertions(+) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 4bb45ec7..edd0e2a7 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -46,6 +46,18 @@ class CFGDenoiserParams: """Total number of sampling steps planned""" +class CFGDenoisedParams: + def __init__(self, x, sampling_step, total_sampling_steps): + self.x = x + """Latent image representation in the process of being denoised""" + + self.sampling_step = sampling_step + """Current Sampling step number""" + + self.total_sampling_steps = total_sampling_steps + """Total number of sampling steps planned""" + + class UiTrainTabParams: def __init__(self, txt2img_preview_params): self.txt2img_preview_params = txt2img_preview_params @@ -68,6 +80,7 @@ callback_map = dict( callbacks_before_image_saved=[], callbacks_image_saved=[], callbacks_cfg_denoiser=[], + callbacks_cfg_denoised=[], callbacks_before_component=[], callbacks_after_component=[], callbacks_image_grid=[], @@ -150,6 +163,14 @@ def cfg_denoiser_callback(params: CFGDenoiserParams): report_exception(c, 'cfg_denoiser_callback') +def cfg_denoised_callback(params: CFGDenoisedParams): + for c in callback_map['callbacks_cfg_denoised']: + try: + c.callback(params) + except Exception: + report_exception(c, 'cfg_denoised_callback') + + def before_component_callback(component, **kwargs): for c in callback_map['callbacks_before_component']: try: @@ -283,6 +304,14 @@ def on_cfg_denoiser(callback): add_callback(callback_map['callbacks_cfg_denoiser'], callback) +def on_cfg_denoised(callback): + """register a function to be called in the kdiffussion cfg_denoiser method after building the inner model inputs. + The callback is called with one argument: + - params: CFGDenoisedParams - parameters to be passed to the inner model and sampling state details. + """ + add_callback(callback_map['callbacks_cfg_denoised'], callback) + + def on_before_component(callback): """register a function to be called before a component is created. The callback is called with arguments: diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index f076fc55..28847397 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -8,6 +8,7 @@ from modules import prompt_parser, devices, sd_samplers_common from modules.shared import opts, state import modules.shared as shared from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback +from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback samplers_k_diffusion = [ ('Euler a', 'sample_euler_ancestral', ['k_euler_a', 'k_euler_ancestral'], {}), @@ -136,6 +137,9 @@ class CFGDenoiser(torch.nn.Module): x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]}) + denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps) + cfg_denoised_callback(denoised_params) + devices.test_for_nans(x_out, "unet") if opts.live_preview_content == "Prompt": -- cgit v1.2.3 From f55a7e04d812e8cb07d622efb321abbad54d2d4a Mon Sep 17 00:00:00 2001 From: RcINS Date: Wed, 15 Feb 2023 16:57:18 +0800 Subject: Fix error when batch count > 1 --- modules/sd_samplers_kdiffusion.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'modules/sd_samplers_kdiffusion.py') diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index d143d41e..86d657e2 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -269,14 +269,15 @@ class KDiffusionSampler: return sigmas - def create_noise_sampler(self, x, sigmas, seeds): + def create_noise_sampler(self, x, sigmas, p): """For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes""" if shared.opts.no_dpmpp_sde_batch_determinism: return None from k_diffusion.sampling import BrownianTreeNoiseSampler sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() - return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seeds) + current_iter_seeds = p.all_seeds[p.iteration * p.batch_size:(p.iteration + 1) * p.batch_size] + return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=current_iter_seeds) def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps) @@ -302,7 +303,7 @@ class KDiffusionSampler: extra_params_kwargs['sigmas'] = sigma_sched if self.funcname == 'sample_dpmpp_sde': - noise_sampler = self.create_noise_sampler(x, sigmas, p.all_seeds) + noise_sampler = self.create_noise_sampler(x, sigmas, p) extra_params_kwargs['noise_sampler'] = noise_sampler self.model_wrap_cfg.init_latent = x @@ -337,7 +338,7 @@ class KDiffusionSampler: extra_params_kwargs['sigmas'] = sigmas if self.funcname == 'sample_dpmpp_sde': - noise_sampler = self.create_noise_sampler(x, sigmas, p.all_seeds) + noise_sampler = self.create_noise_sampler(x, sigmas, p) extra_params_kwargs['noise_sampler'] = noise_sampler self.last_latent = x -- cgit v1.2.3