From cbf15edbf90a68a08eeab40af5df577ba4ac90b6 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 18 Oct 2022 17:23:38 +0300 Subject: remove dependence on TQDM for sampler progress/interrupt functionality --- modules/sd_samplers.py | 107 +++++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 49 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 20309e06..b58e810b 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -98,25 +98,8 @@ def store_latent(decoded): shared.state.current_image = sample_to_image(decoded) - -def extended_tdqm(sequence, *args, desc=None, **kwargs): - state.sampling_steps = len(sequence) - state.sampling_step = 0 - - seq = sequence if cmd_opts.disable_console_progressbars else tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs) - - for x in seq: - if state.interrupted or state.skipped: - break - - yield x - - state.sampling_step += 1 - shared.total_tqdm.update() - - -ldm.models.diffusion.ddim.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs) -ldm.models.diffusion.plms.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs) +class InterruptedException(BaseException): + pass class VanillaStableDiffusionSampler: @@ -128,14 +111,32 @@ class VanillaStableDiffusionSampler: self.init_latent = None self.sampler_noises = None self.step = 0 + self.stop_at = None self.eta = None self.default_eta = 0.0 self.config = None + self.last_latent = None def number_of_needed_noises(self, p): return 0 + def launch_sampling(self, steps, func): + state.sampling_steps = steps + state.sampling_step = 0 + + try: + return func() + except InterruptedException: + return self.last_latent + def p_sample_ddim_hook(self, x_dec, cond, ts, unconditional_conditioning, *args, **kwargs): + if state.interrupted or state.skipped: + raise InterruptedException + + if self.stop_at is not None and self.step > self.stop_at: + raise InterruptedException + + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) @@ -159,11 +160,16 @@ class VanillaStableDiffusionSampler: res = self.orig_p_sample_ddim(x_dec, cond, ts, unconditional_conditioning=unconditional_conditioning, *args, **kwargs) if self.mask is not None: - store_latent(self.init_latent * self.mask + self.nmask * res[1]) + self.last_latent = self.init_latent * self.mask + self.nmask * res[1] else: - store_latent(res[1]) + self.last_latent = res[1] + + store_latent(self.last_latent) self.step += 1 + state.sampling_step = self.step + shared.total_tqdm.update() + return res def initialize(self, p): @@ -192,7 +198,7 @@ class VanillaStableDiffusionSampler: self.init_latent = x self.step = 0 - samples = self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning) + samples = self.launch_sampling(steps, lambda: self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning)) return samples @@ -206,9 +212,9 @@ class VanillaStableDiffusionSampler: # existing code fails with certain step counts, like 9 try: - samples_ddim, _ = self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta) + samples_ddim = self.launch_sampling(steps, lambda: self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)[0]) except Exception: - samples_ddim, _ = self.sampler.sample(S=steps+1, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta) + samples_ddim = self.launch_sampling(steps, lambda: self.sampler.sample(S=steps+1, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)[0]) return samples_ddim @@ -223,6 +229,9 @@ class CFGDenoiser(torch.nn.Module): self.step = 0 def forward(self, x, sigma, uncond, cond, cond_scale): + if state.interrupted or state.skipped: + raise InterruptedException + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) @@ -268,25 +277,6 @@ class CFGDenoiser(torch.nn.Module): return denoised -def extended_trange(sampler, count, *args, **kwargs): - state.sampling_steps = count - state.sampling_step = 0 - - seq = range(count) if cmd_opts.disable_console_progressbars else tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs) - - for x in seq: - if state.interrupted or state.skipped: - break - - if sampler.stop_at is not None and x > sampler.stop_at: - break - - yield x - - state.sampling_step += 1 - shared.total_tqdm.update() - - class TorchHijack: def __init__(self, kdiff_sampler): self.kdiff_sampler = kdiff_sampler @@ -314,9 +304,28 @@ class KDiffusionSampler: self.eta = None self.default_eta = 1.0 self.config = None + self.last_latent = None def callback_state(self, d): - store_latent(d["denoised"]) + step = d['i'] + latent = d["denoised"] + store_latent(latent) + self.last_latent = latent + + if self.stop_at is not None and step > self.stop_at: + raise InterruptedException + + state.sampling_step = step + shared.total_tqdm.update() + + def launch_sampling(self, steps, func): + state.sampling_steps = steps + state.sampling_step = 0 + + try: + return func() + except InterruptedException: + return self.last_latent def number_of_needed_noises(self, p): return p.steps @@ -339,9 +348,6 @@ class KDiffusionSampler: self.sampler_noise_index = 0 self.eta = p.eta or opts.eta_ancestral - if hasattr(k_diffusion.sampling, 'trange'): - k_diffusion.sampling.trange = lambda *args, **kwargs: extended_trange(self, *args, **kwargs) - if self.sampler_noises is not None: k_diffusion.sampling.torch = TorchHijack(self) @@ -383,8 +389,9 @@ class KDiffusionSampler: self.model_wrap_cfg.init_latent = x - return self.func(self.model_wrap_cfg, xi, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs) + samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, xi, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)) + return samples def sample(self, p, x, conditioning, unconditional_conditioning, steps=None): steps = steps or p.steps @@ -406,6 +413,8 @@ class KDiffusionSampler: extra_params_kwargs['n'] = steps else: extra_params_kwargs['sigmas'] = sigmas - samples = self.func(self.model_wrap_cfg, x, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs) + + samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)) + return samples -- cgit v1.2.3 From 8e7097d06a6a261580d34375c9d2a9e4ffc63ffa Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Wed, 19 Oct 2022 13:47:45 -0700 Subject: Added support for RunwayML inpainting model --- modules/processing.py | 34 ++++++- modules/sd_hijack_inpainting.py | 208 ++++++++++++++++++++++++++++++++++++++++ modules/sd_models.py | 16 +++- modules/sd_samplers.py | 50 +++++++--- 4 files changed, 293 insertions(+), 15 deletions(-) create mode 100644 modules/sd_hijack_inpainting.py (limited to 'modules/sd_samplers.py') diff --git a/modules/processing.py b/modules/processing.py index bcb0c32c..a6c308f9 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -546,7 +546,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): if not self.enable_hr: x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) + + # The "masked-image" in this case will just be all zeros since the entire image is masked. + image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device) + image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) + + # Add the fake full 1s mask to the first dimension. + image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) + image_conditioning = image_conditioning.to(x.dtype) + + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=image_conditioning) return samples x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) @@ -714,10 +723,31 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask + if self.image_mask is not None: + conditioning_mask = np.array(self.image_mask.convert("L")) + conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 + conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + + # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 + conditioning_mask = torch.round(conditioning_mask) + else: + conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) + + # Create another latent image, this time with a masked version of the original input. + conditioning_mask = conditioning_mask.to(image.device) + conditioning_image = image * (1.0 - conditioning_mask) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) + + # Create the concatenated conditioning tensor to be fed to `c_concat` + conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) + conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) + self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) + self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning) + samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) if self.mask is not None: samples = samples * self.nmask + self.init_latent * self.mask diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py new file mode 100644 index 00000000..7e5670d6 --- /dev/null +++ b/modules/sd_hijack_inpainting.py @@ -0,0 +1,208 @@ +import torch +import numpy as np + +from tqdm import tqdm +from einops import rearrange, repeat +from omegaconf import ListConfig + +from types import MethodType + +import ldm.models.diffusion.ddpm +import ldm.models.diffusion.ddim + +from ldm.models.diffusion.ddpm import LatentDiffusion +from ldm.models.diffusion.ddim import DDIMSampler, noise_like + +# ================================================================================================= +# Monkey patch DDIMSampler methods from RunwayML repo directly. +# Adapted from: +# https://github.com/runwayml/stable-diffusion/blob/main/ldm/models/diffusion/ddim.py +# ================================================================================================= +@torch.no_grad() +def sample( + self, + S, + batch_size, + shape, + conditioning=None, + callback=None, + normals_sequence=None, + img_callback=None, + quantize_x0=False, + eta=0., + mask=None, + x0=None, + temperature=1., + noise_dropout=0., + score_corrector=None, + corrector_kwargs=None, + verbose=True, + x_T=None, + log_every_t=100, + unconditional_guidance_scale=1., + unconditional_conditioning=None, + # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... + **kwargs + ): + if conditioning is not None: + if isinstance(conditioning, dict): + ctmp = conditioning[list(conditioning.keys())[0]] + while isinstance(ctmp, list): + ctmp = elf.inpainting_fill == 2: + self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask + elif self.inpainting_fill == 3: + self.init_latent = self.init_latent * self.mask + + if self.image_mask is not None: + conditioning_mask = np.array(self.image_mask.convert("L")) + conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 + conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + + # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 + conditioning_mask = torch.round(conditioning_mask) + else: + conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) + + # Create another latent image, this time with a masked version of the original input. + conditioning_mask = conditioning_mask.to(image.device) + conditioning_image = image * (1.0 - conditioning_mask) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) + + # Create the concatenated conditioning tensor to be fed to `c_concat` + conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) + conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) + self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) + self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + x = create_random_tensors([opctmp[0] + cbs = ctmp.shape[0] + if cbs != batch_size: + print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") + else: + if conditioning.shape[0] != batch_size: + print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") + + self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) + # sampling + C, H, W = shape + size = (batch_size, C, H, W) + print(f'Data shape for DDIM sampling is {size}, eta {eta}') + + samples, intermediates = self.ddim_sampling(conditioning, size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + return samples, intermediates + + +@torch.no_grad() +def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, + unconditional_guidance_scale=1., unconditional_conditioning=None): + b, *_, device = *x.shape, x.device + + if unconditional_conditioning is None or unconditional_guidance_scale == 1.: + e_t = self.model.apply_model(x, t, c) + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t] * 2) + if isinstance(c, dict): + assert isinstance(unconditional_conditioning, dict) + c_in = dict() + for k in c: + if isinstance(c[k], list): + c_in[k] = [ + torch.cat([unconditional_conditioning[k][i], c[k][i]]) + for i in range(len(c[k])) + ] + else: + c_in[k] = torch.cat([unconditional_conditioning[k], c[k]]) + else: + c_in = torch.cat([unconditional_conditioning, c]) + e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) + e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) + + if score_corrector is not None: + assert self.model.parameterization == "eps" + e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) + + alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas + alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev + sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas + sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas + # select parameters corresponding to the currently considered timestep + a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) + a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) + sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) + sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device) + + # current prediction for x_0 + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + if quantize_denoised: + pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) + # direction pointing to x_t + dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t + noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise + return x_prev, pred_x0 + + +# ================================================================================================= +# Monkey patch LatentInpaintDiffusion to load the checkpoint with a proper config. +# Adapted from: +# https://github.com/runwayml/stable-diffusion/blob/main/ldm/models/diffusion/ddpm.py +# ================================================================================================= + +@torch.no_grad() +def get_unconditional_conditioning(self, batch_size, null_label=None): + if null_label is not None: + xc = null_label + if isinstance(xc, ListConfig): + xc = list(xc) + if isinstance(xc, dict) or isinstance(xc, list): + c = self.get_learned_conditioning(xc) + else: + if hasattr(xc, "to"): + xc = xc.to(self.device) + c = self.get_learned_conditioning(xc) + else: + # todo: get null label from cond_stage_model + raise NotImplementedError() + c = repeat(c, "1 ... -> b ...", b=batch_size).to(self.device) + return c + +class LatentInpaintDiffusion(LatentDiffusion): + def __init__( + self, + concat_keys=("mask", "masked_image"), + masked_image_key="masked_image", + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.masked_image_key = masked_image_key + assert self.masked_image_key in concat_keys + self.concat_keys = concat_keys + +def should_hijack_inpainting(checkpoint_info): + return str(checkpoint_info.filename).endswith("inpainting.ckpt") and not checkpoint_info.config.endswith("inpainting.yaml") + +def do_inpainting_hijack(): + ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning + ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion + ldm.models.diffusion.ddim.DDIMSampler.p_sample_ddim = p_sample_ddim + ldm.models.diffusion.ddim.DDIMSampler.sample = sample \ No newline at end of file diff --git a/modules/sd_models.py b/modules/sd_models.py index eae22e87..47836d25 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -9,6 +9,7 @@ from ldm.util import instantiate_from_config from modules import shared, modelloader, devices from modules.paths import models_path +from modules.sd_hijack_inpainting import do_inpainting_hijack, should_hijack_inpainting model_dir = "Stable-diffusion" model_path = os.path.abspath(os.path.join(models_path, model_dir)) @@ -211,6 +212,19 @@ def load_model(): print(f"Loading config from: {checkpoint_info.config}") sd_config = OmegaConf.load(checkpoint_info.config) + + if should_hijack_inpainting(checkpoint_info): + do_inpainting_hijack() + + # Hardcoded config for now... + sd_config.model.target = "ldm.models.diffusion.ddpm.LatentInpaintDiffusion" + sd_config.model.params.use_ema = False + sd_config.model.params.conditioning_key = "hybrid" + sd_config.model.params.unet_config.params.in_channels = 9 + + # Create a "fake" config with a different name so that we know to unload it when switching models. + checkpoint_info = checkpoint_info._replace(config=checkpoint_info.config.replace(".yaml", "-inpainting.yaml")) + sd_model = instantiate_from_config(sd_config.model) load_model_weights(sd_model, checkpoint_info) @@ -234,7 +248,7 @@ def reload_model_weights(sd_model, info=None): if sd_model.sd_model_checkpoint == checkpoint_info.filename: return - if sd_model.sd_checkpoint_info.config != checkpoint_info.config: + if sd_model.sd_checkpoint_info.config != checkpoint_info.config or should_hijack_inpainting(checkpoint_info) != should_hijack_inpainting(sd_model.sd_checkpoint_info): checkpoints_loaded.clear() shared.sd_model = load_model() return shared.sd_model diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index b58e810b..9d3cf289 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -136,9 +136,15 @@ class VanillaStableDiffusionSampler: if self.stop_at is not None and self.step > self.stop_at: raise InterruptedException + # Have to unwrap the inpainting conditioning here to perform pre-preocessing + image_conditioning = None + if isinstance(cond, dict): + image_conditioning = cond["c_concat"][0] + cond = cond["c_crossattn"][0] + unconditional_conditioning = unconditional_conditioning["c_crossattn"][0] conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) - unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) + unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers' cond = tensor @@ -157,6 +163,10 @@ class VanillaStableDiffusionSampler: img_orig = self.sampler.model.q_sample(self.init_latent, ts) x_dec = img_orig * self.mask + self.nmask * x_dec + if image_conditioning is not None: + cond = {"c_concat": [image_conditioning], "c_crossattn": [cond]} + unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]} + res = self.orig_p_sample_ddim(x_dec, cond, ts, unconditional_conditioning=unconditional_conditioning, *args, **kwargs) if self.mask is not None: @@ -182,7 +192,7 @@ class VanillaStableDiffusionSampler: self.mask = p.mask if hasattr(p, 'mask') else None self.nmask = p.nmask if hasattr(p, 'nmask') else None - def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None): + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps, t_enc = setup_img2img_steps(p, steps) self.initialize(p) @@ -202,7 +212,7 @@ class VanillaStableDiffusionSampler: return samples - def sample(self, p, x, conditioning, unconditional_conditioning, steps=None): + def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): self.initialize(p) self.init_latent = None @@ -210,6 +220,11 @@ class VanillaStableDiffusionSampler: steps = steps or p.steps + # Wrap the conditioning models with additional image conditioning for inpainting model + if image_conditioning is not None: + conditioning = {"c_concat": [image_conditioning], "c_crossattn": [conditioning]} + unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]} + # existing code fails with certain step counts, like 9 try: samples_ddim = self.launch_sampling(steps, lambda: self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)[0]) @@ -228,7 +243,7 @@ class CFGDenoiser(torch.nn.Module): self.init_latent = None self.step = 0 - def forward(self, x, sigma, uncond, cond, cond_scale): + def forward(self, x, sigma, uncond, cond, cond_scale, image_cond): if state.interrupted or state.skipped: raise InterruptedException @@ -239,28 +254,29 @@ class CFGDenoiser(torch.nn.Module): repeats = [len(conds_list[i]) for i in range(batch_size)] x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x]) + image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond]) sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma]) if tensor.shape[1] == uncond.shape[1]: cond_in = torch.cat([tensor, uncond]) if shared.batch_cond_uncond: - x_out = self.inner_model(x_in, sigma_in, cond=cond_in) + x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]}) else: x_out = torch.zeros_like(x_in) for batch_offset in range(0, x_out.shape[0], batch_size): a = batch_offset b = a + batch_size - x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b]) + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [cond_in[a:b]], "c_concat": [image_cond_in[a:b]]}) else: x_out = torch.zeros_like(x_in) batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size for batch_offset in range(0, tensor.shape[0], batch_size): a = batch_offset b = min(a + batch_size, tensor.shape[0]) - x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=tensor[a:b]) + x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [tensor[a:b]], "c_concat": [image_cond_in[a:b]]}) - x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=uncond) + x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]}) denoised_uncond = x_out[-uncond.shape[0]:] denoised = torch.clone(denoised_uncond) @@ -361,7 +377,7 @@ class KDiffusionSampler: return extra_params_kwargs - def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None): + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps, t_enc = setup_img2img_steps(p, steps) if p.sampler_noise_scheduler_override: @@ -389,11 +405,16 @@ class KDiffusionSampler: self.model_wrap_cfg.init_latent = x - samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, xi, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)) + samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, xi, extra_args={ + 'cond': conditioning, + 'image_cond': image_conditioning, + 'uncond': unconditional_conditioning, + 'cond_scale': p.cfg_scale + }, disable=False, callback=self.callback_state, **extra_params_kwargs)) return samples - def sample(self, p, x, conditioning, unconditional_conditioning, steps=None): + def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning = None): steps = steps or p.steps if p.sampler_noise_scheduler_override: @@ -414,7 +435,12 @@ class KDiffusionSampler: else: extra_params_kwargs['sigmas'] = sigmas - samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)) + samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={ + 'cond': conditioning, + 'image_cond': image_conditioning, + 'uncond': unconditional_conditioning, + 'cond_scale': p.cfg_scale + }, disable=False, callback=self.callback_state, **extra_params_kwargs)) return samples -- cgit v1.2.3 From dde9f960727bfe151d418e43685a2881cf580a17 Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Wed, 19 Oct 2022 14:14:24 -0700 Subject: added support for ddim img2img --- modules/sd_samplers.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'modules/sd_samplers.py') diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 9d3cf289..d270e4df 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -208,6 +208,12 @@ class VanillaStableDiffusionSampler: self.init_latent = x self.step = 0 + # Wrap the conditioning models with additional image conditioning for inpainting model + if image_conditioning is not None: + conditioning = {"c_concat": [image_conditioning], "c_crossattn": [conditioning]} + unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]} + + samples = self.launch_sampling(steps, lambda: self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning)) return samples -- cgit v1.2.3 From c418467c03db916c3e5312e6ac4a67365e196dbd Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Wed, 19 Oct 2022 15:09:43 -0700 Subject: Don't compute latent mask if were not using it. Also added support for fixed highres_fix generation. --- modules/processing.py | 72 +++++++++++++++++++++++++++++++------------------- modules/sd_samplers.py | 4 +++ 2 files changed, 49 insertions(+), 27 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/processing.py b/modules/processing.py index a6c308f9..684e5833 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -541,12 +541,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f - def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): - self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) - - if not self.enable_hr: - x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - + def create_dummy_mask(self, x): + if self.sampler.conditioning_key in {'hybrid', 'concat'}: # The "masked-image" in this case will just be all zeros since the entire image is masked. image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device) image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) @@ -555,11 +551,23 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) image_conditioning = image_conditioning.to(x.dtype) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=image_conditioning) + else: + # Dummy zero conditioning if we're not using inpainting model. + # Still takes up a bit of memory, but no encoder call. + image_conditioning = torch.zeros(x.shape[0], 5, x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device) + + return image_conditioning + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) + + if not self.enable_hr: + x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x)) return samples x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x)) samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2] @@ -596,7 +604,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): x = None devices.torch_gc() - samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps) + samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps, image_conditioning=self.create_dummy_mask(samples)) return samples @@ -723,26 +731,36 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask - if self.image_mask is not None: - conditioning_mask = np.array(self.image_mask.convert("L")) - conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 - conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + conditioning_key = self.sampler.conditioning_key - # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 - conditioning_mask = torch.round(conditioning_mask) + if conditioning_key in {'hybrid', 'concat'}: + if self.image_mask is not None: + conditioning_mask = np.array(self.image_mask.convert("L")) + conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 + conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + + # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 + conditioning_mask = torch.round(conditioning_mask) + else: + conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) + + # Create another latent image, this time with a masked version of the original input. + conditioning_mask = conditioning_mask.to(image.device) + conditioning_image = image * (1.0 - conditioning_mask) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) + + # Create the concatenated conditioning tensor to be fed to `c_concat` + conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) + conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) + self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) + self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) else: - conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) - - # Create another latent image, this time with a masked version of the original input. - conditioning_mask = conditioning_mask.to(image.device) - conditioning_image = image * (1.0 - conditioning_mask) - conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) - - # Create the concatenated conditioning tensor to be fed to `c_concat` - conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) - conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) - self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) - self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) + self.image_conditioning = torch.zeros( + self.init_latent.shape[0], 5, self.init_latent.shape[-2], self.init_latent.shape[-1], + dtype=self.init_latent.dtype, + device=self.init_latent.device + ) + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index d270e4df..c21be26e 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -117,6 +117,8 @@ class VanillaStableDiffusionSampler: self.config = None self.last_latent = None + self.conditioning_key = sd_model.model.conditioning_key + def number_of_needed_noises(self, p): return 0 @@ -328,6 +330,8 @@ class KDiffusionSampler: self.config = None self.last_latent = None + self.conditioning_key = sd_model.model.conditioning_key + def callback_state(self, d): step = d['i'] latent = d["denoised"] -- cgit v1.2.3 From 92a17a7a4a13fceb3c3e25a2e854b2a7dd6eb5df Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Thu, 20 Oct 2022 09:45:03 -0700 Subject: Made dummy latents smaller. Minor code cleanups --- modules/processing.py | 7 ++++--- modules/sd_samplers.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/processing.py b/modules/processing.py index 3caac25e..539cde38 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -557,7 +557,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): else: # Dummy zero conditioning if we're not using inpainting model. # Still takes up a bit of memory, but no encoder call. - image_conditioning = torch.zeros(x.shape[0], 5, x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device) + # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size. + image_conditioning = torch.zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device) return image_conditioning @@ -759,8 +760,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) else: self.image_conditioning = torch.zeros( - self.init_latent.shape[0], 5, self.init_latent.shape[-2], self.init_latent.shape[-1], - dtype=self.init_latent.dtype, + self.init_latent.shape[0], 5, 1, 1, + dtype=self.init_latent.dtype, device=self.init_latent.device ) diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index c21be26e..cc682593 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -138,7 +138,7 @@ class VanillaStableDiffusionSampler: if self.stop_at is not None and self.step > self.stop_at: raise InterruptedException - # Have to unwrap the inpainting conditioning here to perform pre-preocessing + # Have to unwrap the inpainting conditioning here to perform pre-processing image_conditioning = None if isinstance(cond, dict): image_conditioning = cond["c_concat"][0] @@ -146,7 +146,7 @@ class VanillaStableDiffusionSampler: unconditional_conditioning = unconditional_conditioning["c_crossattn"][0] conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) - unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) + unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers' cond = tensor @@ -165,6 +165,8 @@ class VanillaStableDiffusionSampler: img_orig = self.sampler.model.q_sample(self.init_latent, ts) x_dec = img_orig * self.mask + self.nmask * x_dec + # Wrap the image conditioning back up since the DDIM code can accept the dict directly. + # Note that they need to be lists because it just concatenates them later. if image_conditioning is not None: cond = {"c_concat": [image_conditioning], "c_crossattn": [cond]} unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]} -- cgit v1.2.3 From d23a46ceaa76af2847f11172f32c92665c268b1b Mon Sep 17 00:00:00 2001 From: Vladimir Repin <32306715+mezotaken@users.noreply.github.com> Date: Thu, 20 Oct 2022 23:49:14 +0300 Subject: Different approach to skip/interrupt with highres fix --- modules/processing.py | 4 +++- modules/sd_samplers.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/processing.py b/modules/processing.py index 6324ca91..bcb0c32c 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -587,7 +587,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): x = None devices.torch_gc() - return self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps) or samples + samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps) + + return samples class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index b58e810b..7ff77c01 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -196,6 +196,7 @@ class VanillaStableDiffusionSampler: x1 = self.sampler.stochastic_encode(x, torch.tensor([t_enc] * int(x.shape[0])).to(shared.device), noise=noise) self.init_latent = x + self.last_latent = x self.step = 0 samples = self.launch_sampling(steps, lambda: self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning)) @@ -206,6 +207,7 @@ class VanillaStableDiffusionSampler: self.initialize(p) self.init_latent = None + self.last_latent = x self.step = 0 steps = steps or p.steps @@ -388,6 +390,7 @@ class KDiffusionSampler: extra_params_kwargs['sigmas'] = sigma_sched self.model_wrap_cfg.init_latent = x + self.last_latent = x samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, xi, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)) @@ -414,6 +417,7 @@ class KDiffusionSampler: else: extra_params_kwargs['sigmas'] = sigmas + self.last_latent = x samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)) return samples -- cgit v1.2.3 From 4fdb53c1e9962507fc8336dad9a0fabfe6c418c0 Mon Sep 17 00:00:00 2001 From: Unnoen Date: Wed, 19 Oct 2022 21:38:10 +1100 Subject: Generate grid preview for progress image --- modules/sd_samplers.py | 26 +++++++++++++++++++++++++- modules/shared.py | 1 + modules/ui.py | 5 ++++- 3 files changed, 30 insertions(+), 2 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index f58a29b9..74a480e5 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -7,7 +7,7 @@ import inspect import k_diffusion.sampling import ldm.models.diffusion.ddim import ldm.models.diffusion.plms -from modules import prompt_parser, devices, processing +from modules import prompt_parser, devices, processing, images from modules.shared import opts, cmd_opts, state import modules.shared as shared @@ -89,6 +89,30 @@ def sample_to_image(samples): x_sample = x_sample.astype(np.uint8) return Image.fromarray(x_sample) +def samples_to_image_grid(samples): + progress_images = [] + for i in range(len(samples)): + # Decode the samples individually to reduce VRAM usage at the cost of a bit of speed. + x_sample = processing.decode_first_stage(shared.sd_model, samples[i:i+1])[0] + x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + progress_images.append(Image.fromarray(x_sample)) + + return images.image_grid(progress_images) + +def samples_to_image_grid_combined(samples): + progress_images = [] + # Decode all samples at once to increase speed at the cost of VRAM usage. + x_samples = processing.decode_first_stage(shared.sd_model, samples) + x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) + + for x_sample in x_samples: + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + progress_images.append(Image.fromarray(x_sample)) + + return images.image_grid(progress_images) def store_latent(decoded): state.current_latent = decoded diff --git a/modules/shared.py b/modules/shared.py index d9cb65ef..95d6e225 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -294,6 +294,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"), options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), "show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set 0 to disable.", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}), + "progress_decode_combined": OptionInfo(False, "Decode all progress images at once. (Slighty speeds up progress generation but consumes significantly more VRAM with large batches.)"), "return_grid": OptionInfo(True, "Show grid in results for web"), "do_not_show_images": OptionInfo(False, "Do not show any images in results for web"), "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"), diff --git a/modules/ui.py b/modules/ui.py index 56c233ab..de0abc7e 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -318,7 +318,10 @@ def check_progress_call(id_part): if shared.parallel_processing_allowed: if shared.state.sampling_step - shared.state.current_image_sampling_step >= opts.show_progress_every_n_steps and shared.state.current_latent is not None: - shared.state.current_image = modules.sd_samplers.sample_to_image(shared.state.current_latent) + if opts.progress_decode_combined: + shared.state.current_image = modules.sd_samplers.samples_to_image_grid_combined(shared.state.current_latent) + else: + shared.state.current_image = modules.sd_samplers.samples_to_image_grid(shared.state.current_latent) shared.state.current_image_sampling_step = shared.state.sampling_step image = shared.state.current_image -- cgit v1.2.3 From d213d6ca6f90094cb45c11e2f3cb37d25a8d1f94 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 22 Oct 2022 20:48:13 +0300 Subject: removed the option to use 2x more memory when generating previews added an option to always only show one image in previews removed duplicate code --- modules/sd_samplers.py | 35 ++++++++++------------------------- modules/shared.py | 2 +- modules/ui.py | 6 +++--- 3 files changed, 14 insertions(+), 29 deletions(-) (limited to 'modules/sd_samplers.py') diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 74a480e5..0b408a70 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -71,6 +71,7 @@ sampler_extra_params = { 'sample_dpm_2': ['s_churn', 's_tmin', 's_tmax', 's_noise'], } + def setup_img2img_steps(p, steps=None): if opts.img2img_fix_steps or steps is not None: steps = int((steps or p.steps) / min(p.denoising_strength, 0.999)) if p.denoising_strength > 0 else 0 @@ -82,37 +83,21 @@ def setup_img2img_steps(p, steps=None): return steps, t_enc -def sample_to_image(samples): - x_sample = processing.decode_first_stage(shared.sd_model, samples[0:1])[0] +def single_sample_to_image(sample): + x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) x_sample = x_sample.astype(np.uint8) return Image.fromarray(x_sample) + +def sample_to_image(samples): + return single_sample_to_image(samples[0]) + + def samples_to_image_grid(samples): - progress_images = [] - for i in range(len(samples)): - # Decode the samples individually to reduce VRAM usage at the cost of a bit of speed. - x_sample = processing.decode_first_stage(shared.sd_model, samples[i:i+1])[0] - x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) - x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) - x_sample = x_sample.astype(np.uint8) - progress_images.append(Image.fromarray(x_sample)) - - return images.image_grid(progress_images) - -def samples_to_image_grid_combined(samples): - progress_images = [] - # Decode all samples at once to increase speed at the cost of VRAM usage. - x_samples = processing.decode_first_stage(shared.sd_model, samples) - x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) - - for x_sample in x_samples: - x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) - x_sample = x_sample.astype(np.uint8) - progress_images.append(Image.fromarray(x_sample)) - - return images.image_grid(progress_images) + return images.image_grid([single_sample_to_image(sample) for sample in samples]) + def store_latent(decoded): state.current_latent = decoded diff --git a/modules/shared.py b/modules/shared.py index 95d6e225..25bfc895 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -294,7 +294,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"), options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), "show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set 0 to disable.", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}), - "progress_decode_combined": OptionInfo(False, "Decode all progress images at once. (Slighty speeds up progress generation but consumes significantly more VRAM with large batches.)"), + "show_progress_grid": OptionInfo(True, "Show previews of all images generated in a batch as a grid"), "return_grid": OptionInfo(True, "Show grid in results for web"), "do_not_show_images": OptionInfo(False, "Do not show any images in results for web"), "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"), diff --git a/modules/ui.py b/modules/ui.py index de0abc7e..ffa14cac 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -318,10 +318,10 @@ def check_progress_call(id_part): if shared.parallel_processing_allowed: if shared.state.sampling_step - shared.state.current_image_sampling_step >= opts.show_progress_every_n_steps and shared.state.current_latent is not None: - if opts.progress_decode_combined: - shared.state.current_image = modules.sd_samplers.samples_to_image_grid_combined(shared.state.current_latent) - else: + if opts.show_progress_grid: shared.state.current_image = modules.sd_samplers.samples_to_image_grid(shared.state.current_latent) + else: + shared.state.current_image = modules.sd_samplers.sample_to_image(shared.state.current_latent) shared.state.current_image_sampling_step = shared.state.sampling_step image = shared.state.current_image -- cgit v1.2.3