diff options
Diffstat (limited to 'modules/processing.py')
-rw-r--r-- | modules/processing.py | 1003 |
1 files changed, 623 insertions, 380 deletions
diff --git a/modules/processing.py b/modules/processing.py index b0992ee1..846e4796 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1,9 +1,11 @@ +from __future__ import annotations
import json
import logging
import math
import os
import sys
import hashlib
+from dataclasses import dataclass, field
import torch
import numpy as np
@@ -11,11 +13,13 @@ from PIL import Image, ImageOps import random
import cv2
from skimage import exposure
-from typing import Any, Dict, List
+from typing import Any
import modules.sd_hijack
-from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram, infotext, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors, rng
+from modules.rng import slerp # noqa: F401
from modules.sd_hijack import model_hijack
+from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes
from modules.shared import opts, cmd_opts, state
import modules.shared as shared
import modules.paths as paths
@@ -55,21 +59,25 @@ def apply_color_correction(correction, original_image): image = blendLayers(image, original_image, BlendType.LUMINOSITY)
+ return image.convert('RGB')
+
+
+def uncrop(image, dest_size, paste_loc):
+ x, y, w, h = paste_loc
+ base_image = Image.new('RGBA', dest_size)
+ image = images.resize_image(1, image, w, h)
+ base_image.paste(image, (x, y))
+ image = base_image
+
return image
-def apply_overlay(image, paste_loc, index, overlays):
- if overlays is None or index >= len(overlays):
+def apply_overlay(image, paste_loc, overlay):
+ if overlay is None:
return image
- overlay = overlays[index]
-
if paste_loc is not None:
- x, y, w, h = paste_loc
- base_image = Image.new('RGBA', (overlay.width, overlay.height))
- image = images.resize_image(1, image, w, h)
- base_image.paste(image, (x, y))
- image = base_image
+ image = uncrop(image, (overlay.width, overlay.height), paste_loc)
image = image.convert('RGBA')
image.alpha_composite(overlay)
@@ -77,13 +85,22 @@ def apply_overlay(image, paste_loc, index, overlays): return image
+def create_binary_mask(image, round=True):
+ if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255):
+ if round:
+ image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
+ else:
+ image = image.split()[-1].convert("L")
+ else:
+ image = image.convert('L')
+ return image
def txt2img_image_conditioning(sd_model, x, width, height):
if sd_model.model.conditioning_key in {'hybrid', 'concat'}: # Inpainting models
- # The "masked-image" in this case will just be all zeros since the entire image is masked.
- image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
- image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
+ # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
+ image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
+ image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method))
# Add the fake full 1s mask to the first dimension.
image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
@@ -96,100 +113,186 @@ def txt2img_image_conditioning(sd_model, x, width, height): return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device)
else:
+ sd = sd_model.model.state_dict()
+ diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
+ if diffusion_model_input is not None:
+ if diffusion_model_input.shape[1] == 9:
+ # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
+ image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
+ image_conditioning = images_tensor_to_samples(image_conditioning,
+ approximation_indexes.get(opts.sd_vae_encode_method))
+
+ # Add the fake full 1s mask to the first dimension.
+ image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+ image_conditioning = image_conditioning.to(x.dtype)
+
+ return image_conditioning
+
# Dummy zero conditioning if we're not using inpainting or unclip models.
# Still takes up a bit of memory, but no encoder call.
# Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
return x.new_zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
+@dataclass(repr=False)
class StableDiffusionProcessing:
- """
- The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
- """
+ sd_model: object = None
+ outpath_samples: str = None
+ outpath_grids: str = None
+ prompt: str = ""
+ prompt_for_display: str = None
+ negative_prompt: str = ""
+ styles: list[str] = None
+ seed: int = -1
+ subseed: int = -1
+ subseed_strength: float = 0
+ seed_resize_from_h: int = -1
+ seed_resize_from_w: int = -1
+ seed_enable_extras: bool = True
+ sampler_name: str = None
+ batch_size: int = 1
+ n_iter: int = 1
+ steps: int = 50
+ cfg_scale: float = 7.0
+ width: int = 512
+ height: int = 512
+ restore_faces: bool = None
+ tiling: bool = None
+ do_not_save_samples: bool = False
+ do_not_save_grid: bool = False
+ extra_generation_params: dict[str, Any] = None
+ overlay_images: list = None
+ eta: float = None
+ do_not_reload_embeddings: bool = False
+ denoising_strength: float = None
+ ddim_discretize: str = None
+ s_min_uncond: float = None
+ s_churn: float = None
+ s_tmax: float = None
+ s_tmin: float = None
+ s_noise: float = None
+ override_settings: dict[str, Any] = None
+ override_settings_restore_afterwards: bool = True
+ sampler_index: int = None
+ refiner_checkpoint: str = None
+ refiner_switch_at: float = None
+ token_merging_ratio = 0
+ token_merging_ratio_hr = 0
+ disable_extra_networks: bool = False
+
+ scripts_value: scripts.ScriptRunner = field(default=None, init=False)
+ script_args_value: list = field(default=None, init=False)
+ scripts_setup_complete: bool = field(default=False, init=False)
+
cached_uc = [None, None]
cached_c = [None, None]
- def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_min_uncond: float = 0.0, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
- if sampler_index is not None:
+ comments: dict = None
+ sampler: sd_samplers_common.Sampler | None = field(default=None, init=False)
+ is_using_inpainting_conditioning: bool = field(default=False, init=False)
+ paste_to: tuple | None = field(default=None, init=False)
+
+ is_hr_pass: bool = field(default=False, init=False)
+
+ c: tuple = field(default=None, init=False)
+ uc: tuple = field(default=None, init=False)
+
+ rng: rng.ImageRNG | None = field(default=None, init=False)
+ step_multiplier: int = field(default=1, init=False)
+ color_corrections: list = field(default=None, init=False)
+
+ all_prompts: list = field(default=None, init=False)
+ all_negative_prompts: list = field(default=None, init=False)
+ all_seeds: list = field(default=None, init=False)
+ all_subseeds: list = field(default=None, init=False)
+ iteration: int = field(default=0, init=False)
+ main_prompt: str = field(default=None, init=False)
+ main_negative_prompt: str = field(default=None, init=False)
+
+ prompts: list = field(default=None, init=False)
+ negative_prompts: list = field(default=None, init=False)
+ seeds: list = field(default=None, init=False)
+ subseeds: list = field(default=None, init=False)
+ extra_network_data: dict = field(default=None, init=False)
+
+ user: str = field(default=None, init=False)
+
+ sd_model_name: str = field(default=None, init=False)
+ sd_model_hash: str = field(default=None, init=False)
+ sd_vae_name: str = field(default=None, init=False)
+ sd_vae_hash: str = field(default=None, init=False)
+
+ is_api: bool = field(default=False, init=False)
+
+ def __post_init__(self):
+ if self.sampler_index is not None:
print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
- self.outpath_samples: str = outpath_samples
- self.outpath_grids: str = outpath_grids
- self.prompt: str = prompt
- self.prompt_for_display: str = None
- self.negative_prompt: str = (negative_prompt or "")
- self.styles: list = styles or []
- self.seed: int = seed
- self.subseed: int = subseed
- self.subseed_strength: float = subseed_strength
- self.seed_resize_from_h: int = seed_resize_from_h
- self.seed_resize_from_w: int = seed_resize_from_w
- self.sampler_name: str = sampler_name
- self.batch_size: int = batch_size
- self.n_iter: int = n_iter
- self.steps: int = steps
- self.cfg_scale: float = cfg_scale
- self.width: int = width
- self.height: int = height
- self.restore_faces: bool = restore_faces
- self.tiling: bool = tiling
- self.do_not_save_samples: bool = do_not_save_samples
- self.do_not_save_grid: bool = do_not_save_grid
- self.extra_generation_params: dict = extra_generation_params or {}
- self.overlay_images = overlay_images
- self.eta = eta
- self.do_not_reload_embeddings = do_not_reload_embeddings
- self.paste_to = None
- self.color_corrections = None
- self.denoising_strength: float = denoising_strength
+ self.comments = {}
+
+ if self.styles is None:
+ self.styles = []
+
self.sampler_noise_scheduler_override = None
- self.ddim_discretize = ddim_discretize or opts.ddim_discretize
- self.s_min_uncond = s_min_uncond or opts.s_min_uncond
- self.s_churn = s_churn or opts.s_churn
- self.s_tmin = s_tmin or opts.s_tmin
- self.s_tmax = s_tmax or float('inf') # not representable as a standard ui option
- self.s_noise = s_noise or opts.s_noise
- self.override_settings = {k: v for k, v in (override_settings or {}).items() if k not in shared.restricted_opts}
- self.override_settings_restore_afterwards = override_settings_restore_afterwards
- self.is_using_inpainting_conditioning = False
- self.disable_extra_networks = False
- self.token_merging_ratio = 0
- self.token_merging_ratio_hr = 0
-
- if not seed_enable_extras:
+ self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond
+ self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn
+ self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin
+ self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf')
+ self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise
+
+ self.extra_generation_params = self.extra_generation_params or {}
+ self.override_settings = self.override_settings or {}
+ self.script_args = self.script_args or {}
+
+ self.refiner_checkpoint_info = None
+
+ if not self.seed_enable_extras:
self.subseed = -1
self.subseed_strength = 0
self.seed_resize_from_h = 0
self.seed_resize_from_w = 0
- self.scripts = None
- self.script_args = script_args
- self.all_prompts = None
- self.all_negative_prompts = None
- self.all_seeds = None
- self.all_subseeds = None
- self.iteration = 0
- self.is_hr_pass = False
- self.sampler = None
-
- self.prompts = None
- self.negative_prompts = None
- self.extra_network_data = None
- self.seeds = None
- self.subseeds = None
-
- self.step_multiplier = 1
self.cached_uc = StableDiffusionProcessing.cached_uc
self.cached_c = StableDiffusionProcessing.cached_c
- self.uc = None
- self.c = None
-
- self.user = None
@property
def sd_model(self):
return shared.sd_model
+ @sd_model.setter
+ def sd_model(self, value):
+ pass
+
+ @property
+ def scripts(self):
+ return self.scripts_value
+
+ @scripts.setter
+ def scripts(self, value):
+ self.scripts_value = value
+
+ if self.scripts_value and self.script_args_value and not self.scripts_setup_complete:
+ self.setup_scripts()
+
+ @property
+ def script_args(self):
+ return self.script_args_value
+
+ @script_args.setter
+ def script_args(self, value):
+ self.script_args_value = value
+
+ if self.scripts_value and self.script_args_value and not self.scripts_setup_complete:
+ self.setup_scripts()
+
+ def setup_scripts(self):
+ self.scripts_setup_complete = True
+
+ self.scripts.setup_scrips(self, is_ui=not self.is_api)
+
+ def comment(self, text):
+ self.comments[text] = 1
+
def txt2img_image_conditioning(self, x, width=None, height=None):
self.is_using_inpainting_conditioning = self.sd_model.model.conditioning_key in {'hybrid', 'concat'}
@@ -202,7 +305,7 @@ class StableDiffusionProcessing: midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device)
midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size)
- conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
+ conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
conditioning = torch.nn.functional.interpolate(
self.sd_model.depth_model(midas_in),
size=conditioning_image.shape[2:],
@@ -215,7 +318,7 @@ class StableDiffusionProcessing: return conditioning
def edit_image_conditioning(self, source_image):
- conditioning_image = self.sd_model.encode_first_stage(source_image).mode()
+ conditioning_image = shared.sd_model.encode_first_stage(source_image).mode()
return conditioning_image
@@ -227,7 +330,7 @@ class StableDiffusionProcessing: c_adm = torch.cat((c_adm, noise_level_emb), 1)
return c_adm
- def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None):
+ def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
self.is_using_inpainting_conditioning = True
# Handle the different mask inputs
@@ -239,8 +342,10 @@ class StableDiffusionProcessing: conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
- # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
- conditioning_mask = torch.round(conditioning_mask)
+ if round_image_mask:
+ # Caller is requesting a discretized mask as input, so we round to either 1.0 or 0.0
+ conditioning_mask = torch.round(conditioning_mask)
+
else:
conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:])
@@ -264,7 +369,7 @@ class StableDiffusionProcessing: return image_conditioning
- def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
+ def img2img_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
source_image = devices.cond_cast_float(source_image)
# HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
@@ -276,11 +381,17 @@ class StableDiffusionProcessing: return self.edit_image_conditioning(source_image)
if self.sampler.conditioning_key in {'hybrid', 'concat'}:
- return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+ return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask, round_image_mask=round_image_mask)
if self.sampler.conditioning_key == "crossattn-adm":
return self.unclip_image_conditioning(source_image)
+ sd = self.sampler.model_wrap.inner_model.model.state_dict()
+ diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
+ if diffusion_model_input is not None:
+ if diffusion_model_input.shape[1] == 9:
+ return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+
# Dummy zero conditioning if we're not using inpainting or depth model.
return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
@@ -294,7 +405,7 @@ class StableDiffusionProcessing: self.sampler = None
self.c = None
self.uc = None
- if not opts.experimental_persistent_cond_cache:
+ if not opts.persistent_cond_cache:
StableDiffusionProcessing.cached_c = [None, None]
StableDiffusionProcessing.cached_uc = [None, None]
@@ -305,35 +416,35 @@ class StableDiffusionProcessing: return self.token_merging_ratio or opts.token_merging_ratio
def setup_prompts(self):
- if type(self.prompt) == list:
+ if isinstance(self.prompt,list):
self.all_prompts = self.prompt
+ elif isinstance(self.negative_prompt, list):
+ self.all_prompts = [self.prompt] * len(self.negative_prompt)
else:
self.all_prompts = self.batch_size * self.n_iter * [self.prompt]
- if type(self.negative_prompt) == list:
+ if isinstance(self.negative_prompt, list):
self.all_negative_prompts = self.negative_prompt
else:
- self.all_negative_prompts = self.batch_size * self.n_iter * [self.negative_prompt]
+ self.all_negative_prompts = [self.negative_prompt] * len(self.all_prompts)
+
+ if len(self.all_prompts) != len(self.all_negative_prompts):
+ raise RuntimeError(f"Received a different number of prompts ({len(self.all_prompts)}) and negative prompts ({len(self.all_negative_prompts)})")
self.all_prompts = [shared.prompt_styles.apply_styles_to_prompt(x, self.styles) for x in self.all_prompts]
self.all_negative_prompts = [shared.prompt_styles.apply_negative_styles_to_prompt(x, self.styles) for x in self.all_negative_prompts]
- def get_conds_with_caching(self, function, required_prompts, steps, caches, extra_network_data):
- """
- Returns the result of calling function(shared.sd_model, required_prompts, steps)
- using a cache to store the result if the same arguments have been used before.
-
- cache is an array containing two elements. The first element is a tuple
- representing the previously used arguments, or None if no arguments
- have been used before. The second element is where the previously
- computed result is stored.
+ self.main_prompt = self.all_prompts[0]
+ self.main_negative_prompt = self.all_negative_prompts[0]
- caches is a list with items described above.
- """
+ def cached_params(self, required_prompts, steps, extra_network_data, hires_steps=None, use_old_scheduling=False):
+ """Returns parameters that invalidate the cond cache if changed"""
- cached_params = (
+ return (
required_prompts,
steps,
+ hires_steps,
+ use_old_scheduling,
opts.CLIP_stop_at_last_layers,
shared.sd_model.sd_checkpoint_info,
extra_network_data,
@@ -341,8 +452,31 @@ class StableDiffusionProcessing: opts.sdxl_crop_top,
self.width,
self.height,
+ opts.fp8_storage,
+ opts.cache_fp16_weight,
)
+ def get_conds_with_caching(self, function, required_prompts, steps, caches, extra_network_data, hires_steps=None):
+ """
+ Returns the result of calling function(shared.sd_model, required_prompts, steps)
+ using a cache to store the result if the same arguments have been used before.
+
+ cache is an array containing two elements. The first element is a tuple
+ representing the previously used arguments, or None if no arguments
+ have been used before. The second element is where the previously
+ computed result is stored.
+
+ caches is a list with items described above.
+ """
+
+ if shared.opts.use_old_scheduling:
+ old_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(required_prompts, steps, hires_steps, False)
+ new_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(required_prompts, steps, hires_steps, True)
+ if old_schedules != new_schedules:
+ self.extra_generation_params["Old prompt editing timelines"] = True
+
+ cached_params = self.cached_params(required_prompts, steps, extra_network_data, hires_steps, shared.opts.use_old_scheduling)
+
for cache in caches:
if cache[0] is not None and cached_params == cache[0]:
return cache[1]
@@ -350,7 +484,7 @@ class StableDiffusionProcessing: cache = caches[0]
with devices.autocast():
- cache[1] = function(shared.sd_model, required_prompts, steps)
+ cache[1] = function(shared.sd_model, required_prompts, steps, hires_steps, shared.opts.use_old_scheduling)
cache[0] = cached_params
return cache[1]
@@ -360,13 +494,23 @@ class StableDiffusionProcessing: negative_prompts = prompt_parser.SdConditioning(self.negative_prompts, width=self.width, height=self.height, is_negative_prompt=True)
sampler_config = sd_samplers.find_sampler_config(self.sampler_name)
- self.step_multiplier = 2 if sampler_config and sampler_config.options.get("second_order", False) else 1
- self.uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, negative_prompts, self.steps * self.step_multiplier, [self.cached_uc], self.extra_network_data)
- self.c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, prompts, self.steps * self.step_multiplier, [self.cached_c], self.extra_network_data)
+ total_steps = sampler_config.total_steps(self.steps) if sampler_config else self.steps
+ self.step_multiplier = total_steps // self.steps
+ self.firstpass_steps = total_steps
+
+ self.uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, negative_prompts, total_steps, [self.cached_uc], self.extra_network_data)
+ self.c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, prompts, total_steps, [self.cached_c], self.extra_network_data)
+
+ def get_conds(self):
+ return self.c, self.uc
def parse_extra_network_prompts(self):
self.prompts, self.extra_network_data = extra_networks.parse_prompts(self.prompts)
+ def save_samples(self) -> bool:
+ """Returns whether generated images need to be written to disk"""
+ return opts.samples_save and not self.do_not_save_samples and (opts.save_incomplete_images or not state.interrupted and not state.skipped)
+
class Processed:
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
@@ -377,7 +521,7 @@ class Processed: self.subseed = subseed
self.subseed_strength = p.subseed_strength
self.info = info
- self.comments = comments
+ self.comments = "".join(f"{comment}\n" for comment in p.comments)
self.width = p.width
self.height = p.height
self.sampler_name = p.sampler_name
@@ -387,7 +531,10 @@ class Processed: self.batch_size = p.batch_size
self.restore_faces = p.restore_faces
self.face_restoration_model = opts.face_restoration_model if p.restore_faces else None
- self.sd_model_hash = shared.sd_model.sd_model_hash
+ self.sd_model_name = p.sd_model_name
+ self.sd_model_hash = p.sd_model_hash
+ self.sd_vae_name = p.sd_vae_name
+ self.sd_vae_hash = p.sd_vae_hash
self.seed_resize_from_w = p.seed_resize_from_w
self.seed_resize_from_h = p.seed_resize_from_h
self.denoising_strength = getattr(p, 'denoising_strength', None)
@@ -407,10 +554,10 @@ class Processed: self.s_noise = p.s_noise
self.s_min_uncond = p.s_min_uncond
self.sampler_noise_scheduler_override = p.sampler_noise_scheduler_override
- self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0]
- self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
- self.seed = int(self.seed if type(self.seed) != list else self.seed[0]) if self.seed is not None else -1
- self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
+ self.prompt = self.prompt if not isinstance(self.prompt, list) else self.prompt[0]
+ self.negative_prompt = self.negative_prompt if not isinstance(self.negative_prompt, list) else self.negative_prompt[0]
+ self.seed = int(self.seed if not isinstance(self.seed, list) else self.seed[0]) if self.seed is not None else -1
+ self.subseed = int(self.subseed if not isinstance(self.subseed, list) else self.subseed[0]) if self.subseed is not None else -1
self.is_using_inpainting_conditioning = p.is_using_inpainting_conditioning
self.all_prompts = all_prompts or p.all_prompts or [self.prompt]
@@ -418,6 +565,7 @@ class Processed: self.all_seeds = all_seeds or p.all_seeds or [self.seed]
self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
self.infotexts = infotexts or [info]
+ self.version = program_version()
def js(self):
obj = {
@@ -438,7 +586,10 @@ class Processed: "batch_size": self.batch_size,
"restore_faces": self.restore_faces,
"face_restoration_model": self.face_restoration_model,
+ "sd_model_name": self.sd_model_name,
"sd_model_hash": self.sd_model_hash,
+ "sd_vae_name": self.sd_vae_name,
+ "sd_vae_hash": self.sd_vae_hash,
"seed_resize_from_w": self.seed_resize_from_w,
"seed_resize_from_h": self.seed_resize_from_h,
"denoising_strength": self.denoising_strength,
@@ -449,6 +600,7 @@ class Processed: "job_timestamp": self.job_timestamp,
"clip_skip": self.clip_skip,
"is_using_inpainting_conditioning": self.is_using_inpainting_conditioning,
+ "version": self.version,
}
return json.dumps(obj)
@@ -460,86 +612,17 @@ class Processed: return self.token_merging_ratio_hr if for_hr else self.token_merging_ratio
-# from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3
-def slerp(val, low, high):
- low_norm = low/torch.norm(low, dim=1, keepdim=True)
- high_norm = high/torch.norm(high, dim=1, keepdim=True)
- dot = (low_norm*high_norm).sum(1)
-
- if dot.mean() > 0.9995:
- return low * val + high * (1 - val)
-
- omega = torch.acos(dot)
- so = torch.sin(omega)
- res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high
- return res
-
-
def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None):
- eta_noise_seed_delta = opts.eta_noise_seed_delta or 0
- xs = []
-
- # if we have multiple seeds, this means we are working with batch size>1; this then
- # enables the generation of additional tensors with noise that the sampler will use during its processing.
- # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
- # produce the same images as with two batches [100], [101].
- if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or eta_noise_seed_delta > 0):
- sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
- else:
- sampler_noises = None
-
- for i, seed in enumerate(seeds):
- noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
-
- subnoise = None
- if subseeds is not None:
- subseed = 0 if i >= len(subseeds) else subseeds[i]
-
- subnoise = devices.randn(subseed, noise_shape)
-
- # randn results depend on device; gpu and cpu get different results for same seed;
- # the way I see it, it's better to do this on CPU, so that everyone gets same result;
- # but the original script had it like this, so I do not dare change it for now because
- # it will break everyone's seeds.
- noise = devices.randn(seed, noise_shape)
-
- if subnoise is not None:
- noise = slerp(subseed_strength, noise, subnoise)
-
- if noise_shape != shape:
- x = devices.randn(seed, shape)
- dx = (shape[2] - noise_shape[2]) // 2
- dy = (shape[1] - noise_shape[1]) // 2
- w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx
- h = noise_shape[1] if dy >= 0 else noise_shape[1] + 2 * dy
- tx = 0 if dx < 0 else dx
- ty = 0 if dy < 0 else dy
- dx = max(-dx, 0)
- dy = max(-dy, 0)
-
- x[:, ty:ty+h, tx:tx+w] = noise[:, dy:dy+h, dx:dx+w]
- noise = x
+ g = rng.ImageRNG(shape, seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=seed_resize_from_h, seed_resize_from_w=seed_resize_from_w)
+ return g.next()
- if sampler_noises is not None:
- cnt = p.sampler.number_of_needed_noises(p)
- if eta_noise_seed_delta > 0:
- torch.manual_seed(seed + eta_noise_seed_delta)
-
- for j in range(cnt):
- sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape)))
-
- xs.append(noise)
-
- if sampler_noises is not None:
- p.sampler.sampler_noises = [torch.stack(n).to(shared.device) for n in sampler_noises]
-
- x = torch.stack(xs).to(shared.device)
- return x
+class DecodedSamples(list):
+ already_decoded = True
def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
- samples = []
+ samples = DecodedSamples()
for i in range(batch.shape[0]):
sample = decode_first_stage(model, batch[i:i + 1])[0]
@@ -554,7 +637,7 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False): errors.print_error_explanation(
"A tensor with all NaNs was produced in VAE.\n"
"Web UI will now convert VAE into 32-bit float and retry.\n"
- "To disable this behavior, disable the 'Automaticlly revert VAE to 32-bit floats' setting.\n"
+ "To disable this behavior, disable the 'Automatically revert VAE to 32-bit floats' setting.\n"
"To always start with 32-bit VAE, use --no-half-vae commandline flag."
)
@@ -572,14 +655,16 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False): return samples
-def decode_first_stage(model, x):
- x = model.decode_first_stage(x.to(devices.dtype_vae))
-
- return x
-
-
def get_fixed_seed(seed):
- if seed is None or seed == '' or seed == -1:
+ if seed == '' or seed is None:
+ seed = -1
+ elif isinstance(seed, str):
+ try:
+ seed = int(seed)
+ except Exception:
+ seed = -1
+
+ if seed == -1:
return int(random.randrange(4294967294))
return seed
@@ -622,10 +707,14 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "CFG scale": p.cfg_scale,
"Image CFG scale": getattr(p, 'image_cfg_scale', None),
"Seed": p.all_seeds[0] if use_main_prompt else all_seeds[index],
- "Face restoration": (opts.face_restoration_model if p.restore_faces else None),
+ "Face restoration": opts.face_restoration_model if p.restore_faces else None,
"Size": f"{p.width}x{p.height}",
- "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
- "Model": (None if not opts.add_model_name_to_info else shared.sd_model.sd_checkpoint_info.name_for_extra),
+ "Model hash": p.sd_model_hash if opts.add_model_hash_to_info else None,
+ "Model": p.sd_model_name if opts.add_model_name_to_info else None,
+ "FP8 weight": opts.fp8_storage if devices.fp8 else None,
+ "Cache FP16 weight for LoRA": opts.cache_fp16_weight if devices.fp8 else None,
+ "VAE hash": p.sd_vae_hash if opts.add_vae_hash_to_info else None,
+ "VAE": p.sd_vae_name if opts.add_vae_name_to_info else None,
"Variation seed": (None if p.subseed_strength == 0 else (p.all_subseeds[0] if use_main_prompt else all_subseeds[index])),
"Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
"Seed resize from": (None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
@@ -638,15 +727,16 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "Init image hash": getattr(p, 'init_img_hash', None),
"RNG": opts.randn_source if opts.randn_source != "GPU" else None,
"NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
+ "Tiling": "True" if p.tiling else None,
**p.extra_generation_params,
"Version": program_version() if opts.add_version_to_infotext else None,
"User": p.user if opts.add_user_name_to_info else None,
}
- generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
+ generation_params_text = ", ".join([k if k == v else f'{k}: {infotext.quote(v)}' for k, v in generation_params.items() if v is not None])
- prompt_text = p.prompt if use_main_prompt else all_prompts[index]
- negative_prompt_text = f"\nNegative prompt: {all_negative_prompts[index]}" if all_negative_prompts[index] else ""
+ prompt_text = p.main_prompt if use_main_prompt else all_prompts[index]
+ negative_prompt_text = f"\nNegative prompt: {p.main_negative_prompt if use_main_prompt else all_negative_prompts[index]}" if all_negative_prompts[index] else ""
return f"{prompt_text}{negative_prompt_text}\n{generation_params_text}".strip()
@@ -655,16 +745,17 @@ def process_images(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None:
p.scripts.before_process(p)
- stored_opts = {k: opts.data[k] for k in p.override_settings.keys()}
+ stored_opts = {k: opts.data[k] if k in opts.data else opts.get_default(k) for k in p.override_settings.keys() if k in opts.data}
try:
# if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint
+ # and if after running refiner, the refiner model is not unloaded - webui swaps back to main model here, if model over is present it will be reloaded afterwards
if sd_models.checkpoint_aliases.get(p.override_settings.get('sd_model_checkpoint')) is None:
p.override_settings.pop('sd_model_checkpoint', None)
sd_models.reload_model_weights()
for k, v in p.override_settings.items():
- setattr(opts, k, v)
+ opts.set(k, v, is_api=True, run_callbacks=False)
if k == 'sd_model_checkpoint':
sd_models.reload_model_weights()
@@ -693,7 +784,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: def process_images_inner(p: StableDiffusionProcessing) -> Processed:
"""this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
- if type(p.prompt) == list:
+ if isinstance(p.prompt, list):
assert(len(p.prompt) > 0)
else:
assert p.prompt is not None
@@ -703,19 +794,33 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: seed = get_fixed_seed(p.seed)
subseed = get_fixed_seed(p.subseed)
+ if p.restore_faces is None:
+ p.restore_faces = opts.face_restoration
+
+ if p.tiling is None:
+ p.tiling = opts.tiling
+
+ if p.refiner_checkpoint not in (None, "", "None", "none"):
+ p.refiner_checkpoint_info = sd_models.get_closet_checkpoint_match(p.refiner_checkpoint)
+ if p.refiner_checkpoint_info is None:
+ raise Exception(f'Could not find checkpoint with name {p.refiner_checkpoint}')
+
+ p.sd_model_name = shared.sd_model.sd_checkpoint_info.name_for_extra
+ p.sd_model_hash = shared.sd_model.sd_model_hash
+ p.sd_vae_name = sd_vae.get_loaded_vae_name()
+ p.sd_vae_hash = sd_vae.get_loaded_vae_hash()
+
modules.sd_hijack.model_hijack.apply_circular(p.tiling)
modules.sd_hijack.model_hijack.clear_comments()
- comments = {}
-
p.setup_prompts()
- if type(seed) == list:
+ if isinstance(seed, list):
p.all_seeds = seed
else:
p.all_seeds = [int(seed) + (x if p.subseed_strength == 0 else 0) for x in range(len(p.all_prompts))]
- if type(subseed) == list:
+ if isinstance(subseed, list):
p.all_subseeds = subseed
else:
p.all_subseeds = [int(subseed) + x for x in range(len(p.all_prompts))]
@@ -728,7 +833,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: |