From 8e7097d06a6a261580d34375c9d2a9e4ffc63ffa Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Wed, 19 Oct 2022 13:47:45 -0700 Subject: Added support for RunwayML inpainting model --- modules/processing.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index bcb0c32c..a6c308f9 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -546,7 +546,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): if not self.enable_hr: x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) + + # The "masked-image" in this case will just be all zeros since the entire image is masked. + image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device) + image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) + + # Add the fake full 1s mask to the first dimension. + image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) + image_conditioning = image_conditioning.to(x.dtype) + + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=image_conditioning) return samples x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) @@ -714,10 +723,31 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask + if self.image_mask is not None: + conditioning_mask = np.array(self.image_mask.convert("L")) + conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 + conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + + # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 + conditioning_mask = torch.round(conditioning_mask) + else: + conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) + + # Create another latent image, this time with a masked version of the original input. + conditioning_mask = conditioning_mask.to(image.device) + conditioning_image = image * (1.0 - conditioning_mask) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) + + # Create the concatenated conditioning tensor to be fed to `c_concat` + conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) + conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) + self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) + self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning) + samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) if self.mask is not None: samples = samples * self.nmask + self.init_latent * self.mask -- cgit v1.2.3 From c418467c03db916c3e5312e6ac4a67365e196dbd Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Wed, 19 Oct 2022 15:09:43 -0700 Subject: Don't compute latent mask if were not using it. Also added support for fixed highres_fix generation. --- modules/processing.py | 72 +++++++++++++++++++++++++++++++------------------- modules/sd_samplers.py | 4 +++ 2 files changed, 49 insertions(+), 27 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index a6c308f9..684e5833 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -541,12 +541,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f - def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): - self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) - - if not self.enable_hr: - x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - + def create_dummy_mask(self, x): + if self.sampler.conditioning_key in {'hybrid', 'concat'}: # The "masked-image" in this case will just be all zeros since the entire image is masked. image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device) image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) @@ -555,11 +551,23 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) image_conditioning = image_conditioning.to(x.dtype) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=image_conditioning) + else: + # Dummy zero conditioning if we're not using inpainting model. + # Still takes up a bit of memory, but no encoder call. + image_conditioning = torch.zeros(x.shape[0], 5, x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device) + + return image_conditioning + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): + self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) + + if not self.enable_hr: + x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x)) return samples x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x)) samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2] @@ -596,7 +604,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): x = None devices.torch_gc() - samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps) + samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps, image_conditioning=self.create_dummy_mask(samples)) return samples @@ -723,26 +731,36 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask - if self.image_mask is not None: - conditioning_mask = np.array(self.image_mask.convert("L")) - conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 - conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + conditioning_key = self.sampler.conditioning_key - # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 - conditioning_mask = torch.round(conditioning_mask) + if conditioning_key in {'hybrid', 'concat'}: + if self.image_mask is not None: + conditioning_mask = np.array(self.image_mask.convert("L")) + conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 + conditioning_mask = torch.from_numpy(conditioning_mask[None, None]) + + # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0 + conditioning_mask = torch.round(conditioning_mask) + else: + conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) + + # Create another latent image, this time with a masked version of the original input. + conditioning_mask = conditioning_mask.to(image.device) + conditioning_image = image * (1.0 - conditioning_mask) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) + + # Create the concatenated conditioning tensor to be fed to `c_concat` + conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) + conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) + self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) + self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) else: - conditioning_mask = torch.ones(1, 1, *image.shape[-2:]) - - # Create another latent image, this time with a masked version of the original input. - conditioning_mask = conditioning_mask.to(image.device) - conditioning_image = image * (1.0 - conditioning_mask) - conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) - - # Create the concatenated conditioning tensor to be fed to `c_concat` - conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:]) - conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1) - self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1) - self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) + self.image_conditioning = torch.zeros( + self.init_latent.shape[0], 5, self.init_latent.shape[-2], self.init_latent.shape[-1], + dtype=self.init_latent.dtype, + device=self.init_latent.device + ) + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index d270e4df..c21be26e 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -117,6 +117,8 @@ class VanillaStableDiffusionSampler: self.config = None self.last_latent = None + self.conditioning_key = sd_model.model.conditioning_key + def number_of_needed_noises(self, p): return 0 @@ -328,6 +330,8 @@ class KDiffusionSampler: self.config = None self.last_latent = None + self.conditioning_key = sd_model.model.conditioning_key + def callback_state(self, d): step = d['i'] latent = d["denoised"] -- cgit v1.2.3 From aa7ff2a1972f3865883e10ba28c5414cdebe8e3b Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Wed, 19 Oct 2022 21:46:13 -0700 Subject: Fixed non-square highres fix generation --- modules/processing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 684e5833..3caac25e 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -541,10 +541,13 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f - def create_dummy_mask(self, x): + def create_dummy_mask(self, x, first_phase: bool = False): if self.sampler.conditioning_key in {'hybrid', 'concat'}: + height = self.firstphase_height if first_phase else self.height + width = self.firstphase_width if first_phase else self.width + # The "masked-image" in this case will just be all zeros since the entire image is masked. - image_conditioning = torch.zeros(x.shape[0], 3, self.height, self.width, device=x.device) + image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device) image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning)) # Add the fake full 1s mask to the first dimension. @@ -567,7 +570,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): return samples x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x)) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, first_phase=True)) samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2] -- cgit v1.2.3 From 92a17a7a4a13fceb3c3e25a2e854b2a7dd6eb5df Mon Sep 17 00:00:00 2001 From: random_thoughtss Date: Thu, 20 Oct 2022 09:45:03 -0700 Subject: Made dummy latents smaller. Minor code cleanups --- modules/processing.py | 7 ++++--- modules/sd_samplers.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 3caac25e..539cde38 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -557,7 +557,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): else: # Dummy zero conditioning if we're not using inpainting model. # Still takes up a bit of memory, but no encoder call. - image_conditioning = torch.zeros(x.shape[0], 5, x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device) + # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size. + image_conditioning = torch.zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device) return image_conditioning @@ -759,8 +760,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype) else: self.image_conditioning = torch.zeros( - self.init_latent.shape[0], 5, self.init_latent.shape[-2], self.init_latent.shape[-1], - dtype=self.init_latent.dtype, + self.init_latent.shape[0], 5, 1, 1, + dtype=self.init_latent.dtype, device=self.init_latent.device ) diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index c21be26e..cc682593 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -138,7 +138,7 @@ class VanillaStableDiffusionSampler: if self.stop_at is not None and self.step > self.stop_at: raise InterruptedException - # Have to unwrap the inpainting conditioning here to perform pre-preocessing + # Have to unwrap the inpainting conditioning here to perform pre-processing image_conditioning = None if isinstance(cond, dict): image_conditioning = cond["c_concat"][0] @@ -146,7 +146,7 @@ class VanillaStableDiffusionSampler: unconditional_conditioning = unconditional_conditioning["c_crossattn"][0] conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) - unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) + unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step) assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers' cond = tensor @@ -165,6 +165,8 @@ class VanillaStableDiffusionSampler: img_orig = self.sampler.model.q_sample(self.init_latent, ts) x_dec = img_orig * self.mask + self.nmask * x_dec + # Wrap the image conditioning back up since the DDIM code can accept the dict directly. + # Note that they need to be lists because it just concatenates them later. if image_conditioning is not None: cond = {"c_concat": [image_conditioning], "c_crossattn": [cond]} unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]} -- cgit v1.2.3 From 45872181902ada06267e2de601586d512cf5df1a Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 09:00:39 +0300 Subject: updated readme and some small stylistic changes to code --- README.md | 1 + modules/processing.py | 14 ++++++-------- modules/sd_hijack_inpainting.py | 3 +++ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'modules/processing.py') diff --git a/README.md b/README.md index 859a91b6..a98bb00b 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - No token limit for prompts (original stable diffusion lets you use up to 75 tokens) - DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args) - [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args) +- Support for dedicated [inpainting model](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion) by RunwayML. ## Installation and Running Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs. diff --git a/modules/processing.py b/modules/processing.py index 539cde38..21786968 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -540,11 +540,10 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f - - def create_dummy_mask(self, x, first_phase: bool = False): + def create_dummy_mask(self, x, width=None, height=None): if self.sampler.conditioning_key in {'hybrid', 'concat'}: - height = self.firstphase_height if first_phase else self.height - width = self.firstphase_width if first_phase else self.width + height = height or self.height + width = width or self.width # The "masked-image" in this case will just be all zeros since the entire image is masked. image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device) @@ -571,7 +570,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): return samples x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) - samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, first_phase=True)) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, self.firstphase_width, self.firstphase_height)) samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2] @@ -634,6 +633,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.inpainting_mask_invert = inpainting_mask_invert self.mask = None self.nmask = None + self.image_conditioning = None def init(self, all_prompts, all_seeds, all_subseeds): self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model) @@ -735,9 +735,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask - conditioning_key = self.sampler.conditioning_key - - if conditioning_key in {'hybrid', 'concat'}: + if self.sampler.conditioning_key in {'hybrid', 'concat'}: if self.image_mask is not None: conditioning_mask = np.array(self.image_mask.convert("L")) conditioning_mask = conditioning_mask.astype(np.float32) / 255.0 diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py index 43938071..fd92a335 100644 --- a/modules/sd_hijack_inpainting.py +++ b/modules/sd_hijack_inpainting.py @@ -301,6 +301,7 @@ def get_unconditional_conditioning(self, batch_size, null_label=None): c = repeat(c, "1 ... -> b ...", b=batch_size).to(self.device) return c + class LatentInpaintDiffusion(LatentDiffusion): def __init__( self, @@ -314,9 +315,11 @@ class LatentInpaintDiffusion(LatentDiffusion): assert self.masked_image_key in concat_keys self.concat_keys = concat_keys + def should_hijack_inpainting(checkpoint_info): return str(checkpoint_info.filename).endswith("inpainting.ckpt") and not checkpoint_info.config.endswith("inpainting.yaml") + def do_inpainting_hijack(): ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion -- cgit v1.2.3 From bf30673f5132c8f28357b31224c54331e788d3e7 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 10:19:25 +0300 Subject: Fix Hypernet infotext string split bug for PR #3283 --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index 21786968..d1deffa9 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -304,7 +304,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration "Size": f"{p.width}x{p.height}", "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash), "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')), - "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.filename.split('\\')[-1].split('.')[0]), + "Hypernet": (None if shared.loaded_hypernetwork is None else os.path.splitext(os.path.basename(shared.loaded_hypernetwork.filename))[0]), "Batch size": (None if p.batch_size < 2 else p.batch_size), "Batch pos": (None if p.batch_size < 2 else position_in_batch), "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]), -- cgit v1.2.3 From df5706409386cc2e88718bd9101045587c39f8bb Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 16:10:51 +0300 Subject: do not load aesthetic clip model until it's needed add refresh button for aesthetic embeddings add aesthetic params to images' infotext --- modules/aesthetic_clip.py | 40 +++++++++++++++++++---- modules/generation_parameters_copypaste.py | 18 +++++++++-- modules/img2img.py | 5 +-- modules/processing.py | 4 +-- modules/sd_models.py | 3 -- modules/txt2img.py | 4 +-- modules/ui.py | 52 ++++++++++++++++++++---------- style.css | 2 +- 8 files changed, 89 insertions(+), 39 deletions(-) (limited to 'modules/processing.py') diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py index 34efa931..8c828541 100644 --- a/modules/aesthetic_clip.py +++ b/modules/aesthetic_clip.py @@ -40,6 +40,8 @@ def iter_to_batched(iterable, n=1): def create_ui(): + import modules.ui + with gr.Group(): with gr.Accordion("Open for Clip Aesthetic!", open=False): with gr.Row(): @@ -55,6 +57,8 @@ def create_ui(): label="Aesthetic imgs embedding", value="None") + modules.ui.create_refresh_button(aesthetic_imgs, shared.update_aesthetic_embeddings, lambda: {"choices": sorted(shared.aesthetic_embeddings.keys())}, "refresh_aesthetic_embeddings") + with gr.Row(): aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", @@ -66,11 +70,21 @@ def create_ui(): return aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative +aesthetic_clip_model = None + + +def aesthetic_clip(): + global aesthetic_clip_model + + if aesthetic_clip_model is None or aesthetic_clip_model.name_or_path != shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path: + aesthetic_clip_model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path) + aesthetic_clip_model.cpu() + + return aesthetic_clip_model + + def generate_imgs_embd(name, folder, batch_size): - # clipModel = CLIPModel.from_pretrained( - # shared.sd_model.cond_stage_model.clipModel.name_or_path - # ) - model = shared.clip_model.to(device) + model = aesthetic_clip().to(device) processor = CLIPProcessor.from_pretrained(model.name_or_path) with torch.no_grad(): @@ -91,7 +105,7 @@ def generate_imgs_embd(name, folder, batch_size): path = str(Path(shared.cmd_opts.aesthetic_embeddings_dir) / f"{name}.pt") torch.save(embs, path) - model = model.cpu() + model.cpu() del processor del embs gc.collect() @@ -132,7 +146,7 @@ class AestheticCLIP: self.image_embs = None self.load_image_embs(None) - def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None, + def set_aesthetic_params(self, p, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None, aesthetic_slerp=True, aesthetic_imgs_text="", aesthetic_slerp_angle=0.15, aesthetic_text_negative=False): @@ -145,6 +159,18 @@ class AestheticCLIP: self.aesthetic_steps = aesthetic_steps self.load_image_embs(image_embs_name) + if self.image_embs_name is not None: + p.extra_generation_params.update({ + "Aesthetic LR": aesthetic_lr, + "Aesthetic weight": aesthetic_weight, + "Aesthetic steps": aesthetic_steps, + "Aesthetic embedding": self.image_embs_name, + "Aesthetic slerp": aesthetic_slerp, + "Aesthetic text": aesthetic_imgs_text, + "Aesthetic text negative": aesthetic_text_negative, + "Aesthetic slerp angle": aesthetic_slerp_angle, + }) + def set_skip(self, skip): self.skip = skip @@ -168,7 +194,7 @@ class AestheticCLIP: tokens = torch.asarray(remade_batch_tokens).to(device) - model = copy.deepcopy(shared.clip_model).to(device) + model = copy.deepcopy(aesthetic_clip()).to(device) model.requires_grad_(True) if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0: text_embs_2 = model.get_text_features( diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py index 0f041449..f73647da 100644 --- a/modules/generation_parameters_copypaste.py +++ b/modules/generation_parameters_copypaste.py @@ -4,13 +4,22 @@ import gradio as gr from modules.shared import script_path from modules import shared -re_param_code = r"\s*([\w ]+):\s*([^,]+)(?:,|$)" +re_param_code = r'\s*([\w ]+):\s*("(?:\\|\"|[^\"])+"|[^,]*)(?:,|$)' re_param = re.compile(re_param_code) re_params = re.compile(r"^(?:" + re_param_code + "){3,}$") re_imagesize = re.compile(r"^(\d+)x(\d+)$") type_of_gr_update = type(gr.update()) +def quote(text): + if ',' not in str(text): + return text + + text = str(text) + text = text.replace('\\', '\\\\') + text = text.replace('"', '\\"') + return f'"{text}"' + def parse_generation_parameters(x: str): """parses generation parameters string, the one you see in text field under the picture in UI: ``` @@ -83,7 +92,12 @@ def connect_paste(button, paste_fields, input_comp, js=None): else: try: valtype = type(output.value) - val = valtype(v) + + if valtype == bool and v == "False": + val = False + else: + val = valtype(v) + res.append(gr.update(value=val)) except Exception: res.append(gr.update()) diff --git a/modules/img2img.py b/modules/img2img.py index bc7c66bc..eea5199b 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -109,10 +109,7 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro inpainting_mask_invert=inpainting_mask_invert, ) - shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), - aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, - aesthetic_slerp_angle, - aesthetic_text_negative) + shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative) if shared.cmd_opts.enable_console_prompts: print(f"\nimg2img: {prompt}", file=shared.progress_print_out) diff --git a/modules/processing.py b/modules/processing.py index d1deffa9..f0852cd5 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -12,7 +12,7 @@ from skimage import exposure from typing import Any, Dict, List, Optional import modules.sd_hijack -from modules import devices, prompt_parser, masking, sd_samplers, lowvram +from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste from modules.sd_hijack import model_hijack from modules.shared import opts, cmd_opts, state import modules.shared as shared @@ -318,7 +318,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration generation_params.update(p.extra_generation_params) - generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None]) + generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None]) negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else "" diff --git a/modules/sd_models.py b/modules/sd_models.py index 05a1df28..b1c91b0d 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -234,9 +234,6 @@ def load_model(checkpoint_info=None): sd_hijack.model_hijack.hijack(sd_model) - if shared.clip_model is None or shared.clip_model.transformer.name_or_path != sd_model.cond_stage_model.wrapped.transformer.name_or_path: - shared.clip_model = CLIPModel.from_pretrained(sd_model.cond_stage_model.wrapped.transformer.name_or_path) - sd_model.eval() print(f"Model loaded.") diff --git a/modules/txt2img.py b/modules/txt2img.py index 32ed1d8d..1761cfa2 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -36,9 +36,7 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: firstphase_height=firstphase_height if enable_hr else None, ) - shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), - aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, - aesthetic_text_negative) + shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative) if cmd_opts.enable_console_prompts: print(f"\ntxt2img: {prompt}", file=shared.progress_print_out) diff --git a/modules/ui.py b/modules/ui.py index 381ca925..0d020de6 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -597,27 +597,29 @@ def apply_setting(key, value): return value -def create_ui(wrap_gradio_gpu_call): - import modules.img2img - import modules.txt2img +def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id): + def refresh(): + refresh_method() + args = refreshed_args() if callable(refreshed_args) else refreshed_args - def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id): - def refresh(): - refresh_method() - args = refreshed_args() if callable(refreshed_args) else refreshed_args + for k, v in args.items(): + setattr(refresh_component, k, v) - for k, v in args.items(): - setattr(refresh_component, k, v) + return gr.update(**(args or {})) - return gr.update(**(args or {})) + refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id) + refresh_button.click( + fn=refresh, + inputs=[], + outputs=[refresh_component] + ) + return refresh_button + + +def create_ui(wrap_gradio_gpu_call): + import modules.img2img + import modules.txt2img - refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id) - refresh_button.click( - fn = refresh, - inputs = [], - outputs = [refresh_component] - ) - return refresh_button with gr.Blocks(analytics_enabled=False) as txt2img_interface: txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False) @@ -802,6 +804,14 @@ def create_ui(wrap_gradio_gpu_call): (hr_options, lambda d: gr.Row.update(visible="Denoising strength" in d)), (firstphase_width, "First pass size-1"), (firstphase_height, "First pass size-2"), + (aesthetic_lr, "Aesthetic LR"), + (aesthetic_weight, "Aesthetic weight"), + (aesthetic_steps, "Aesthetic steps"), + (aesthetic_imgs, "Aesthetic embedding"), + (aesthetic_slerp, "Aesthetic slerp"), + (aesthetic_imgs_text, "Aesthetic text"), + (aesthetic_text_negative, "Aesthetic text negative"), + (aesthetic_slerp_angle, "Aesthetic slerp angle"), ] txt2img_preview_params = [ @@ -1077,6 +1087,14 @@ def create_ui(wrap_gradio_gpu_call): (seed_resize_from_w, "Seed resize from-1"), (seed_resize_from_h, "Seed resize from-2"), (denoising_strength, "Denoising strength"), + (aesthetic_lr_im, "Aesthetic LR"), + (aesthetic_weight_im, "Aesthetic weight"), + (aesthetic_steps_im, "Aesthetic steps"), + (aesthetic_imgs_im, "Aesthetic embedding"), + (aesthetic_slerp_im, "Aesthetic slerp"), + (aesthetic_imgs_text_im, "Aesthetic text"), + (aesthetic_text_negative_im, "Aesthetic text negative"), + (aesthetic_slerp_angle_im, "Aesthetic slerp angle"), ] token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter]) diff --git a/style.css b/style.css index 26ae36a5..5d2bacc9 100644 --- a/style.css +++ b/style.css @@ -477,7 +477,7 @@ input[type="range"]{ padding: 0; } -#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization{ +#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization, #refresh_aesthetic_embeddings{ max-width: 2.5em; min-width: 2.5em; height: 2.4em; -- cgit v1.2.3 From fccad18a59e3c2c33fefbbb1763c6a87a3a68eba Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 21 Oct 2022 02:17:26 -0700 Subject: Refer to Hypernet's name, sensibly, by its name variable. --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/processing.py') diff --git a/modules/processing.py b/modules/processing.py index f0852cd5..ff1ec4c9 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -304,7 +304,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration "Size": f"{p.width}x{p.height}", "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash), "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')), - "Hypernet": (None if shared.loaded_hypernetwork is None else os.path.splitext(os.path.basename(shared.loaded_hypernetwork.filename))[0]), + "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name), "Batch size": (None if p.batch_size < 2 else p.batch_size), "Batch pos": (None if p.batch_size < 2 else position_in_batch), "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]), -- cgit v1.2.3