From bb57f30c2de46cfca5419ad01738a41705f96cc3 Mon Sep 17 00:00:00 2001 From: MalumaDev Date: Fri, 14 Oct 2022 10:56:41 +0200 Subject: init --- modules/shared.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'modules/shared.py') diff --git a/modules/shared.py b/modules/shared.py index 5901e605..cf13a10d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -30,6 +30,8 @@ parser.add_argument("--no-half-vae", action='store_true', help="do not switch th parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)") parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI") parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)") +parser.add_argument("--aesthetic_embeddings-dir", type=str, default=os.path.join(script_path, 'aesthetic_embeddings'), + help="aesthetic_embeddings directory(default: aesthetic_embeddings)") parser.add_argument("--hypernetwork-dir", type=str, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory") parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui") parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage") @@ -90,6 +92,9 @@ os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True) hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) loaded_hypernetwork = None +aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in + os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} + def reload_hypernetworks(): global hypernetworks -- cgit v1.2.3 From 37d7ffb415cd8c69b3c0bb5f61844dde0b169f78 Mon Sep 17 00:00:00 2001 From: MalumaDev Date: Sat, 15 Oct 2022 15:59:37 +0200 Subject: fix to tokens lenght, addend embs generator, add new features to edit the embedding before the generation using text --- modules/aesthetic_clip.py | 78 ++++++++++++++++++++++++ modules/processing.py | 148 +++++++++++++++++++++++++++++++--------------- modules/sd_hijack.py | 111 ++++++++++++++++++++++------------ modules/shared.py | 4 ++ modules/txt2img.py | 10 +++- modules/ui.py | 47 ++++++++++++--- 6 files changed, 302 insertions(+), 96 deletions(-) create mode 100644 modules/aesthetic_clip.py (limited to 'modules/shared.py') diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py new file mode 100644 index 00000000..f15cfd47 --- /dev/null +++ b/modules/aesthetic_clip.py @@ -0,0 +1,78 @@ +import itertools +import os +from pathlib import Path +import html +import gc + +import gradio as gr +import torch +from PIL import Image +from modules import shared +from modules.shared import device, aesthetic_embeddings +from transformers import CLIPModel, CLIPProcessor + +from tqdm.auto import tqdm + + +def get_all_images_in_folder(folder): + return [os.path.join(folder, f) for f in os.listdir(folder) if + os.path.isfile(os.path.join(folder, f)) and check_is_valid_image_file(f)] + + +def check_is_valid_image_file(filename): + return filename.lower().endswith(('.png', '.jpg', '.jpeg')) + + +def batched(dataset, total, n=1): + for ndx in range(0, total, n): + yield [dataset.__getitem__(i) for i in range(ndx, min(ndx + n, total))] + + +def iter_to_batched(iterable, n=1): + it = iter(iterable) + while True: + chunk = tuple(itertools.islice(it, n)) + if not chunk: + return + yield chunk + + +def generate_imgs_embd(name, folder, batch_size): + # clipModel = CLIPModel.from_pretrained( + # shared.sd_model.cond_stage_model.clipModel.name_or_path + # ) + model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path).to(device) + processor = CLIPProcessor.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path) + + with torch.no_grad(): + embs = [] + for paths in tqdm(iter_to_batched(get_all_images_in_folder(folder), batch_size), + desc=f"Generating embeddings for {name}"): + if shared.state.interrupted: + break + inputs = processor(images=[Image.open(path) for path in paths], return_tensors="pt").to(device) + outputs = model.get_image_features(**inputs).cpu() + embs.append(torch.clone(outputs)) + inputs.to("cpu") + del inputs, outputs + + embs = torch.cat(embs, dim=0).mean(dim=0, keepdim=True) + + # The generated embedding will be located here + path = str(Path(shared.cmd_opts.aesthetic_embeddings_dir) / f"{name}.pt") + torch.save(embs, path) + + model = model.cpu() + del model + del processor + del embs + gc.collect() + torch.cuda.empty_cache() + res = f""" + Done generating embedding for {name}! + Hypernetwork saved to {html.escape(path)} + """ + shared.update_aesthetic_embeddings() + return gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding", + value=sorted(aesthetic_embeddings.keys())[0] if len( + aesthetic_embeddings) > 0 else None), res, "" diff --git a/modules/processing.py b/modules/processing.py index 9a033759..ab68d63a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -20,7 +20,6 @@ import modules.images as images import modules.styles import logging - # some of those options should not be changed at all because they would break the model, so I removed them from options. opt_C = 4 opt_f = 8 @@ -52,8 +51,13 @@ def get_correct_sampler(p): elif isinstance(p, modules.processing.StableDiffusionProcessingImg2Img): return sd_samplers.samplers_for_img2img + class StableDiffusionProcessing: - def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None): + def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, + subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, + sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, + restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, + extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None): self.sd_model = sd_model self.outpath_samples: str = outpath_samples self.outpath_grids: str = outpath_grids @@ -104,7 +108,8 @@ class StableDiffusionProcessing: class Processed: - def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None): + def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, + all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None): self.images = images_list self.prompt = p.prompt self.negative_prompt = p.negative_prompt @@ -141,7 +146,8 @@ class Processed: self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0] self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0] self.seed = int(self.seed if type(self.seed) != list else self.seed[0]) - self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1 + self.subseed = int( + self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1 self.all_prompts = all_prompts or [self.prompt] self.all_seeds = all_seeds or [self.seed] @@ -181,39 +187,43 @@ class Processed: return json.dumps(obj) - def infotext(self, p: StableDiffusionProcessing, index): - return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size) + def infotext(self, p: StableDiffusionProcessing, index): + return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], + position_in_batch=index % self.batch_size, iteration=index // self.batch_size) # from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3 def slerp(val, low, high): - low_norm = low/torch.norm(low, dim=1, keepdim=True) - high_norm = high/torch.norm(high, dim=1, keepdim=True) - dot = (low_norm*high_norm).sum(1) + low_norm = low / torch.norm(low, dim=1, keepdim=True) + high_norm = high / torch.norm(high, dim=1, keepdim=True) + dot = (low_norm * high_norm).sum(1) if dot.mean() > 0.9995: return low * val + high * (1 - val) omega = torch.acos(dot) so = torch.sin(omega) - res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high + res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high return res -def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None): +def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, + p=None): xs = [] # if we have multiple seeds, this means we are working with batch size>1; this then # enables the generation of additional tensors with noise that the sampler will use during its processing. # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to # produce the same images as with two batches [100], [101]. - if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0): + if p is not None and p.sampler is not None and ( + len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0): sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))] else: sampler_noises = None for i, seed in enumerate(seeds): - noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) + noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else ( + shape[0], seed_resize_from_h // 8, seed_resize_from_w // 8) subnoise = None if subseeds is not None: @@ -241,7 +251,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see dx = max(-dx, 0) dy = max(-dy, 0) - x[:, ty:ty+h, tx:tx+w] = noise[:, dy:dy+h, dx:dx+w] + x[:, ty:ty + h, tx:tx + w] = noise[:, dy:dy + h, dx:dx + w] noise = x if sampler_noises is not None: @@ -293,14 +303,20 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration "Seed": all_seeds[index], "Face restoration": (opts.face_restoration_model if p.restore_faces else None), "Size": f"{p.width}x{p.height}", - "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash), - "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')), - "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(':', '')), + "Model hash": getattr(p, 'sd_model_hash', + None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash), + "Model": ( + None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace( + ',', '').replace(':', '')), + "Hypernet": ( + None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace( + ':', '')), "Batch size": (None if p.batch_size < 2 else p.batch_size), "Batch pos": (None if p.batch_size < 2 else position_in_batch), "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]), "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength), - "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"), + "Seed resize from": ( + None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"), "Denoising strength": getattr(p, 'denoising_strength', None), "Eta": (None if p.sampler is None or p.sampler.eta == p.sampler.default_eta else p.sampler.eta), "Clip skip": None if clip_skip <= 1 else clip_skip, @@ -309,7 +325,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration generation_params.update(p.extra_generation_params) - generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None]) + generation_params_text = ", ".join( + [k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None]) negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else "" @@ -317,7 +334,9 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, - aesthetic_imgs=None,aesthetic_slerp=False) -> Processed: + aesthetic_imgs=None, aesthetic_slerp=False, aesthetic_imgs_text="", + aesthetic_slerp_angle=0.15, + aesthetic_text_negative=False) -> Processed: """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" aesthetic_lr = float(aesthetic_lr) @@ -385,7 +404,7 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh for n in range(p.n_iter): if state.skipped: state.skipped = False - + if state.interrupted: break @@ -396,16 +415,19 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh if (len(prompts) == 0): break - #uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt]) - #c = p.sd_model.get_learned_conditioning(prompts) + # uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt]) + # c = p.sd_model.get_learned_conditioning(prompts) with devices.autocast(): if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"): - shared.sd_model.cond_stage_model.set_aesthetic_params(0, 0, 0) + shared.sd_model.cond_stage_model.set_aesthetic_params() uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt], p.steps) if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"): shared.sd_model.cond_stage_model.set_aesthetic_params(aesthetic_lr, aesthetic_weight, - aesthetic_steps, aesthetic_imgs,aesthetic_slerp) + aesthetic_steps, aesthetic_imgs, + aesthetic_slerp, aesthetic_imgs_text, + aesthetic_slerp_angle, + aesthetic_text_negative) c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps) if len(model_hijack.comments) > 0: @@ -413,13 +435,13 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh comments[comment] = 1 if p.n_iter > 1: - shared.state.job = f"Batch {n+1} out of {p.n_iter}" + shared.state.job = f"Batch {n + 1} out of {p.n_iter}" with devices.autocast(): - samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength) + samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, + subseed_strength=p.subseed_strength) if state.interrupted or state.skipped: - # if we are interrupted, sample returns just noise # use the image collected previously in sampler loop samples_ddim = shared.state.current_latent @@ -445,7 +467,9 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh if p.restore_faces: if opts.save and not p.do_not_save_samples and opts.save_images_before_face_restoration: - images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-face-restoration") + images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", seeds[i], prompts[i], + opts.samples_format, info=infotext(n, i), p=p, + suffix="-before-face-restoration") devices.torch_gc() @@ -456,7 +480,8 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh if p.color_corrections is not None and i < len(p.color_corrections): if opts.save and not p.do_not_save_samples and opts.save_images_before_color_correction: - images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-color-correction") + images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, + info=infotext(n, i), p=p, suffix="-before-color-correction") image = apply_color_correction(p.color_corrections[i], image) if p.overlay_images is not None and i < len(p.overlay_images): @@ -474,7 +499,8 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh image = image.convert('RGB') if opts.samples_save and not p.do_not_save_samples: - images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p) + images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, + info=infotext(n, i), p=p) text = infotext(n, i) infotexts.append(text) @@ -482,7 +508,7 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh image.info["parameters"] = text output_images.append(image) - del x_samples_ddim + del x_samples_ddim devices.torch_gc() @@ -504,10 +530,13 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh index_of_first_image = 1 if opts.grid_save: - images.save_image(grid, p.outpath_grids, "grid", all_seeds[0], all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True) + images.save_image(grid, p.outpath_grids, "grid", all_seeds[0], all_prompts[0], opts.grid_format, + info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True) devices.torch_gc() - return Processed(p, output_images, all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]), subseed=all_subseeds[0], all_prompts=all_prompts, all_seeds=all_seeds, all_subseeds=all_subseeds, index_of_first_image=index_of_first_image, infotexts=infotexts) + return Processed(p, output_images, all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]), + subseed=all_subseeds[0], all_prompts=all_prompts, all_seeds=all_seeds, all_subseeds=all_subseeds, + index_of_first_image=index_of_first_image, infotexts=infotexts) class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): @@ -543,25 +572,34 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) if not self.enable_hr: - x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, + subseeds=subseeds, subseed_strength=self.subseed_strength, + seed_resize_from_h=self.seed_resize_from_h, + seed_resize_from_w=self.seed_resize_from_w, p=self) samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) return samples - x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, + subseeds=subseeds, subseed_strength=self.subseed_strength, + seed_resize_from_h=self.seed_resize_from_h, + seed_resize_from_w=self.seed_resize_from_w, p=self) samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) truncate_x = (self.firstphase_width - self.firstphase_width_truncated) // opt_f truncate_y = (self.firstphase_height - self.firstphase_height_truncated) // opt_f - samples = samples[:, :, truncate_y//2:samples.shape[2]-truncate_y//2, truncate_x//2:samples.shape[3]-truncate_x//2] + samples = samples[:, :, truncate_y // 2:samples.shape[2] - truncate_y // 2, + truncate_x // 2:samples.shape[3] - truncate_x // 2] if self.scale_latent: - samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") + samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), + mode="bilinear") else: decoded_samples = decode_first_stage(self.sd_model, samples) if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None": - decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear") + decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), + mode="bilinear") else: lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0) @@ -585,13 +623,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) - noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, + subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, + seed_resize_from_w=self.seed_resize_from_w, p=self) # GC now before running the next img2img to prevent running out of memory x = None devices.torch_gc() - samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps) + samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, + steps=self.steps) return samples @@ -599,7 +640,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): sampler = None - def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, mask=None, mask_blur=4, inpainting_fill=0, inpaint_full_res=True, inpaint_full_res_padding=0, inpainting_mask_invert=0, **kwargs): + def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, mask=None, mask_blur=4, + inpainting_fill=0, inpaint_full_res=True, inpaint_full_res_padding=0, inpainting_mask_invert=0, + **kwargs): super().__init__(**kwargs) self.init_images = init_images @@ -607,7 +650,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.denoising_strength: float = denoising_strength self.init_latent = None self.image_mask = mask - #self.image_unblurred_mask = None + # self.image_unblurred_mask = None self.latent_mask = None self.mask_for_overlay = None self.mask_blur = mask_blur @@ -619,7 +662,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): self.nmask = None def init(self, all_prompts, all_seeds, all_subseeds): - self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model) + self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, + self.sd_model) crop_region = None if self.image_mask is not None: @@ -628,7 +672,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.inpainting_mask_invert: self.image_mask = ImageOps.invert(self.image_mask) - #self.image_unblurred_mask = self.image_mask + # self.image_unblurred_mask = self.image_mask if self.mask_blur > 0: self.image_mask = self.image_mask.filter(ImageFilter.GaussianBlur(self.mask_blur)) @@ -642,7 +686,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): mask = mask.crop(crop_region) self.image_mask = images.resize_image(2, mask, self.width, self.height) - self.paste_to = (x1, y1, x2-x1, y2-y1) + self.paste_to = (x1, y1, x2 - x1, y2 - y1) else: self.image_mask = images.resize_image(self.resize_mode, self.image_mask, self.width, self.height) np_mask = np.array(self.image_mask) @@ -665,7 +709,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.image_mask is not None: image_masked = Image.new('RGBa', (image.width, image.height)) - image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + image_masked.paste(image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) self.overlay_images.append(image_masked.convert('RGBA')) @@ -714,12 +759,17 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): # this needs to be fixed to be done in sample() using actual seeds for batches if self.inpainting_fill == 2: - self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask + self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], + all_seeds[ + 0:self.init_latent.shape[ + 0]]) * self.nmask elif self.inpainting_fill == 3: self.init_latent = self.init_latent * self.mask def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): - x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, + subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, + seed_resize_from_w=self.seed_resize_from_w, p=self) samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 6d5196fe..192883b2 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -14,7 +14,8 @@ from modules.sd_hijack_optimizations import invokeAI_mps_available import ldm.modules.attention import ldm.modules.diffusionmodules.model -from transformers import CLIPVisionModel, CLIPModel +from tqdm import trange +from transformers import CLIPVisionModel, CLIPModel, CLIPTokenizer import torch.optim as optim import copy @@ -22,21 +23,25 @@ attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward + def apply_optimizations(): undo_optimizations() ldm.modules.diffusionmodules.model.nonlinearity = silu - if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (8, 6)): + if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and ( + 6, 0) <= torch.cuda.get_device_capability(shared.device) <= (8, 6)): print("Applying xformers cross attention optimization.") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward elif cmd_opts.opt_split_attention_v1: print("Applying v1 cross attention optimization.") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 - elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()): + elif not cmd_opts.disable_opt_split_attention and ( + cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()): if not invokeAI_mps_available and shared.device.type == 'mps': - print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.") + print( + "The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.") print("Applying v1 cross attention optimization.") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 else: @@ -112,14 +117,16 @@ class StableDiffusionModelHijack: _, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text]) return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count) + def slerp(low, high, val): - low_norm = low/torch.norm(low, dim=1, keepdim=True) - high_norm = high/torch.norm(high, dim=1, keepdim=True) - omega = torch.acos((low_norm*high_norm).sum(1)) + low_norm = low / torch.norm(low, dim=1, keepdim=True) + high_norm = high / torch.norm(high, dim=1, keepdim=True) + omega = torch.acos((low_norm * high_norm).sum(1)) so = torch.sin(omega) - res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high + res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high return res + class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): def __init__(self, wrapped, hijack): super().__init__() @@ -128,6 +135,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): self.wrapped.transformer.name_or_path ) del self.clipModel.vision_model + self.tokenizer = CLIPTokenizer.from_pretrained(self.wrapped.transformer.name_or_path) self.hijack: StableDiffusionModelHijack = hijack self.tokenizer = wrapped.tokenizer # self.vision = CLIPVisionModel.from_pretrained(self.wrapped.transformer.name_or_path).eval() @@ -139,7 +147,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0] - tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k] + tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if + '(' in k or ')' in k or '[' in k or ']' in k] for text, ident in tokens_with_parens: mult = 1.0 for c in text: @@ -155,8 +164,13 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if mult != 1.0: self.token_mults[ident] = mult - def set_aesthetic_params(self, aesthetic_lr, aesthetic_weight, aesthetic_steps, image_embs_name=None, - aesthetic_slerp=True): + def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None, + aesthetic_slerp=True, aesthetic_imgs_text="", + aesthetic_slerp_angle=0.15, + aesthetic_text_negative=False): + self.aesthetic_imgs_text = aesthetic_imgs_text + self.aesthetic_slerp_angle = aesthetic_slerp_angle + self.aesthetic_text_negative = aesthetic_text_negative self.slerp = aesthetic_slerp self.aesthetic_lr = aesthetic_lr self.aesthetic_weight = aesthetic_weight @@ -180,7 +194,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): else: parsed = [[line, 1.0]] - tokenized = self.wrapped.tokenizer([text for text, _ in parsed], truncation=False, add_special_tokens=False)["input_ids"] + tokenized = self.wrapped.tokenizer([text for text, _ in parsed], truncation=False, add_special_tokens=False)[ + "input_ids"] fixes = [] remade_tokens = [] @@ -196,18 +211,20 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if token == self.comma_token: last_comma = len(remade_tokens) - elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), 1) % 75 == 0 and last_comma != -1 and len(remade_tokens) - last_comma <= opts.comma_padding_backtrack: + elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), + 1) % 75 == 0 and last_comma != -1 and len( + remade_tokens) - last_comma <= opts.comma_padding_backtrack: last_comma += 1 reloc_tokens = remade_tokens[last_comma:] reloc_mults = multipliers[last_comma:] remade_tokens = remade_tokens[:last_comma] length = len(remade_tokens) - + rem = int(math.ceil(length / 75)) * 75 - length remade_tokens += [id_end] * rem + reloc_tokens multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults - + if embedding is None: remade_tokens.append(token) multipliers.append(weight) @@ -248,7 +265,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if line in cache: remade_tokens, fixes, multipliers = cache[line] else: - remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments) + remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, + hijack_comments) token_count = max(current_token_count, token_count) cache[line] = (remade_tokens, fixes, multipliers) @@ -259,7 +277,6 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count - def process_text_old(self, text): id_start = self.wrapped.tokenizer.bos_token_id id_end = self.wrapped.tokenizer.eos_token_id @@ -289,7 +306,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): while i < len(tokens): token = tokens[i] - embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i) + embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, + i) mult_change = self.token_mults.get(token) if opts.enable_emphasis else None if mult_change is not None: @@ -312,11 +330,12 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): ovf = remade_tokens[maxlen - 2:] overflowing_words = [vocab.get(int(x), "") for x in ovf] overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words)) - hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n") + hijack_comments.append( + f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n") token_count = len(remade_tokens) remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens)) - remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end] + remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end] cache[tuple_tokens] = (remade_tokens, fixes, multipliers) multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers)) @@ -326,23 +345,26 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): hijack_fixes.append(fixes) batch_multipliers.append(multipliers) return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count - + def forward(self, text): use_old = opts.use_old_emphasis_implementation if use_old: - batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text) + batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old( + text) else: - batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(text) + batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text( + text) self.hijack.comments += hijack_comments if len(used_custom_terms) > 0: - self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms])) - + self.hijack.comments.append( + "Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms])) + if use_old: self.hijack.fixes = hijack_fixes return self.process_tokens(remade_batch_tokens, batch_multipliers) - + z = None i = 0 while max(map(len, remade_batch_tokens)) != 0: @@ -356,7 +378,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if fix[0] == i: fixes.append(fix[1]) self.hijack.fixes.append(fixes) - + tokens = [] multipliers = [] for j in range(len(remade_batch_tokens)): @@ -378,19 +400,30 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): remade_batch_tokens] tokens = torch.asarray(remade_batch_tokens).to(device) + + model = copy.deepcopy(self.clipModel).to(device) + model.requires_grad_(True) + if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0: + text_embs_2 = model.get_text_features( + **self.tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device)) + if self.aesthetic_text_negative: + text_embs_2 = self.image_embs - text_embs_2 + text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True) + img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle) + else: + img_embs = self.image_embs + with torch.enable_grad(): - model = copy.deepcopy(self.clipModel).to(device) - model.requires_grad_(True) # We optimize the model to maximize the similarity optimizer = optim.Adam( model.text_model.parameters(), lr=self.aesthetic_lr ) - for i in range(self.aesthetic_steps): + for i in trange(self.aesthetic_steps, desc="Aesthetic optimization"): text_embs = model.get_text_features(input_ids=tokens) text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True) - sim = text_embs @ self.image_embs.T + sim = text_embs @ img_embs.T loss = -sim optimizer.zero_grad() loss.mean().backward() @@ -405,6 +438,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): model.cpu() del model + zn = torch.concat([zn for i in range(z.shape[1] // 77)], 1) if self.slerp: z = slerp(z, zn, self.aesthetic_weight) else: @@ -413,15 +447,16 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): remade_batch_tokens = rem_tokens batch_multipliers = rem_multipliers i += 1 - + return z - - + def process_tokens(self, remade_batch_tokens, batch_multipliers): if not opts.use_old_emphasis_implementation: - remade_batch_tokens = [[self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in remade_batch_tokens] + remade_batch_tokens = [ + [self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in + remade_batch_tokens] batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers] - + tokens = torch.asarray(remade_batch_tokens).to(device) outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers) @@ -461,8 +496,8 @@ class EmbeddingsWithFixes(torch.nn.Module): for fixes, tensor in zip(batch_fixes, inputs_embeds): for offset, embedding in fixes: emb = embedding.vec - emb_len = min(tensor.shape[0]-offset-1, emb.shape[0]) - tensor = torch.cat([tensor[0:offset+1], emb[0:emb_len], tensor[offset+1+emb_len:]]) + emb_len = min(tensor.shape[0] - offset - 1, emb.shape[0]) + tensor = torch.cat([tensor[0:offset + 1], emb[0:emb_len], tensor[offset + 1 + emb_len:]]) vecs.append(tensor) diff --git a/modules/shared.py b/modules/shared.py index cf13a10d..7cd608ca 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -95,6 +95,10 @@ loaded_hypernetwork = None aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} +def update_aesthetic_embeddings(): + global aesthetic_embeddings + aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in + os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} def reload_hypernetworks(): global hypernetworks diff --git a/modules/txt2img.py b/modules/txt2img.py index 78342024..eedcdfe0 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -13,7 +13,11 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, aesthetic_imgs=None, - aesthetic_slerp=False, *args): + aesthetic_slerp=False, + aesthetic_imgs_text="", + aesthetic_slerp_angle=0.15, + aesthetic_text_negative=False, + *args): p = StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples, @@ -47,7 +51,9 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: processed = modules.scripts.scripts_txt2img.run(p, *args) if processed is None: - processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp) + processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp,aesthetic_imgs_text, + aesthetic_slerp_angle, + aesthetic_text_negative) shared.total_tqdm.clear() diff --git a/modules/ui.py b/modules/ui.py index d961d126..e98e2113 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -41,6 +41,7 @@ from modules import prompt_parser from modules.images import save_image import modules.textual_inversion.ui import modules.hypernetworks.ui +import modules.aesthetic_clip # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI mimetypes.init() @@ -449,7 +450,7 @@ def create_toprow(is_img2img): with gr.Row(): negative_prompt = gr.Textbox(label="Negative prompt", elem_id="negative_prompt", show_label=False, placeholder="Negative prompt", lines=2) with gr.Column(scale=1, elem_id="roll_col"): - sh = gr.Button(elem_id="sh", visible=True) + sh = gr.Button(elem_id="sh", visible=True) with gr.Column(scale=1, elem_id="style_neg_col"): prompt_style2 = gr.Dropdown(label="Style 2", elem_id=f"{id_part}_style2_index", choices=[k for k, v in shared.prompt_styles.styles.items()], value=next(iter(shared.prompt_styles.styles.keys())), visible=len(shared.prompt_styles.styles) > 1) @@ -536,9 +537,13 @@ def create_ui(wrap_gradio_gpu_call): height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) with gr.Group(): - aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005") - aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.7) - aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=50) + aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.0001") + aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9) + aesthetic_steps = gr.Slider(minimum=0, maximum=256, step=1, label="Aesthetic steps", value=5) + with gr.Row(): + aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="") + aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1) + aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False) aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None) aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False) @@ -617,7 +622,10 @@ def create_ui(wrap_gradio_gpu_call): aesthetic_weight, aesthetic_steps, aesthetic_imgs, - aesthetic_slerp + aesthetic_slerp, + aesthetic_imgs_text, + aesthetic_slerp_angle, + aesthetic_text_negative ] + custom_inputs, outputs=[ txt2img_gallery, @@ -721,7 +729,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution', value=False) - inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=256, step=4, value=32) + inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=1024, step=4, value=32) with gr.TabItem('Batch img2img', id='batch'): hidden = '
Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else '' @@ -1071,6 +1079,17 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): create_embedding = gr.Button(value="Create embedding", variant='primary') + with gr.Tab(label="Create images embedding"): + new_embedding_name_ae = gr.Textbox(label="Name") + process_src_ae = gr.Textbox(label='Source directory') + batch_ae = gr.Slider(minimum=1, maximum=1024, step=1, label="Batch size", value=256) + with gr.Row(): + with gr.Column(scale=3): + gr.HTML(value="") + + with gr.Column(): + create_embedding_ae = gr.Button(value="Create images embedding", variant='primary') + with gr.Tab(label="Create hypernetwork"): new_hypernetwork_name = gr.Textbox(label="Name") new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) @@ -1139,7 +1158,7 @@ def create_ui(wrap_gradio_gpu_call): fn=modules.textual_inversion.ui.create_embedding, inputs=[ new_embedding_name, - initialization_text, + process_src, nvpt, ], outputs=[ @@ -1149,6 +1168,20 @@ def create_ui(wrap_gradio_gpu_call): ] ) + create_embedding_ae.click( + fn=modules.aesthetic_clip.generate_imgs_embd, + inputs=[ + new_embedding_name_ae, + process_src_ae, + batch_ae + ], + outputs=[ + aesthetic_imgs, + ti_output, + ti_outcome, + ] + ) + create_hypernetwork.click( fn=modules.hypernetworks.ui.create_hypernetwork, inputs=[ -- cgit v1.2.3 From 523140d7805c644700009b8a2483ff4eb4a22304 Mon Sep 17 00:00:00 2001 From: MalumaDev Date: Sun, 16 Oct 2022 10:23:30 +0200 Subject: ui fix --- modules/aesthetic_clip.py | 3 +-- modules/sd_hijack.py | 3 +-- modules/shared.py | 2 ++ modules/ui.py | 24 ++++++++++++++---------- 4 files changed, 18 insertions(+), 14 deletions(-) (limited to 'modules/shared.py') diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py index 68264284..ccb35c73 100644 --- a/modules/aesthetic_clip.py +++ b/modules/aesthetic_clip.py @@ -74,5 +74,4 @@ def generate_imgs_embd(name, folder, batch_size): """ shared.update_aesthetic_embeddings() return gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()), label="Imgs embedding", - value=sorted(shared.aesthetic_embeddings.keys())[0] if len( - shared.aesthetic_embeddings) > 0 else None), res, "" + value="None"), res, "" diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 01fcb78f..2de2eed5 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -392,8 +392,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): z1 = self.process_tokens(tokens, multipliers) z = z1 if z is None else torch.cat((z, z1), axis=-2) - if len(text[ - 0]) != 0 and self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None: + if self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None: if not opts.use_old_emphasis_implementation: remade_batch_tokens = [ [self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in diff --git a/modules/shared.py b/modules/shared.py index 3c5ffef1..e2c98b2d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -96,11 +96,13 @@ loaded_hypernetwork = None aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} +aesthetic_embeddings = aesthetic_embeddings | {"None": None} def update_aesthetic_embeddings(): global aesthetic_embeddings aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} + aesthetic_embeddings = aesthetic_embeddings | {"None": None} def reload_hypernetworks(): global hypernetworks diff --git a/modules/ui.py b/modules/ui.py index 13ba3142..4069f0d2 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -594,19 +594,23 @@ def create_ui(wrap_gradio_gpu_call): height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) with gr.Group(): - aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001") - - aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9) - aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5) + with gr.Accordion("Open for Clip Aesthetic!",open=False): + with gr.Row(): + aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9) + aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5) - with gr.Row(): - aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="") - aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1) - aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False) + with gr.Row(): + aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001") + aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False) + aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), + label="Aesthetic imgs embedding", + value="None") - aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Aesthetic imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None) + with gr.Row(): + aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="") + aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1) + aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False) - aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False) with gr.Row(): restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1) -- cgit v1.2.3 From 9324cdaa3199d65c182858785dd1eca42b192b8e Mon Sep 17 00:00:00 2001 From: MalumaDev Date: Sun, 16 Oct 2022 17:53:56 +0200 Subject: ui fix, re organization of the code --- modules/aesthetic_clip.py | 154 +++++++++++++++++++++++++++++++++-- modules/img2img.py | 14 +++- modules/processing.py | 29 ++----- modules/sd_hijack.py | 102 ++--------------------- modules/sd_models.py | 5 +- modules/shared.py | 14 +++- modules/textual_inversion/dataset.py | 2 +- modules/txt2img.py | 18 ++-- modules/ui.py | 52 +++++++----- 9 files changed, 233 insertions(+), 157 deletions(-) (limited to 'modules/shared.py') diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py index ccb35c73..34efa931 100644 --- a/modules/aesthetic_clip.py +++ b/modules/aesthetic_clip.py @@ -1,3 +1,4 @@ +import copy import itertools import os from pathlib import Path @@ -7,11 +8,12 @@ import gc import gradio as gr import torch from PIL import Image -from modules import shared -from modules.shared import device -from transformers import CLIPModel, CLIPProcessor +from torch import optim -from tqdm.auto import tqdm +from modules import shared +from transformers import CLIPModel, CLIPProcessor, CLIPTokenizer +from tqdm.auto import tqdm, trange +from modules.shared import opts, device def get_all_images_in_folder(folder): @@ -37,12 +39,39 @@ def iter_to_batched(iterable, n=1): yield chunk +def create_ui(): + with gr.Group(): + with gr.Accordion("Open for Clip Aesthetic!", open=False): + with gr.Row(): + aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", + value=0.9) + aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5) + + with gr.Row(): + aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', + placeholder="Aesthetic learning rate", value="0.0001") + aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False) + aesthetic_imgs = gr.Dropdown(sorted(shared.aesthetic_embeddings.keys()), + label="Aesthetic imgs embedding", + value="None") + + with gr.Row(): + aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', + placeholder="This text is used to rotate the feature space of the imgs embs", + value="") + aesthetic_slerp_angle = gr.Slider(label='Slerp angle', minimum=0, maximum=1, step=0.01, + value=0.1) + aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False) + + return aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative + + def generate_imgs_embd(name, folder, batch_size): # clipModel = CLIPModel.from_pretrained( # shared.sd_model.cond_stage_model.clipModel.name_or_path # ) - model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path).to(device) - processor = CLIPProcessor.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path) + model = shared.clip_model.to(device) + processor = CLIPProcessor.from_pretrained(model.name_or_path) with torch.no_grad(): embs = [] @@ -63,7 +92,6 @@ def generate_imgs_embd(name, folder, batch_size): torch.save(embs, path) model = model.cpu() - del model del processor del embs gc.collect() @@ -74,4 +102,114 @@ def generate_imgs_embd(name, folder, batch_size): """ shared.update_aesthetic_embeddings() return gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()), label="Imgs embedding", - value="None"), res, "" + value="None"), \ + gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()), + label="Imgs embedding", + value="None"), res, "" + + +def slerp(low, high, val): + low_norm = low / torch.norm(low, dim=1, keepdim=True) + high_norm = high / torch.norm(high, dim=1, keepdim=True) + omega = torch.acos((low_norm * high_norm).sum(1)) + so = torch.sin(omega) + res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high + return res + + +class AestheticCLIP: + def __init__(self): + self.skip = False + self.aesthetic_steps = 0 + self.aesthetic_weight = 0 + self.aesthetic_lr = 0 + self.slerp = False + self.aesthetic_text_negative = "" + self.aesthetic_slerp_angle = 0 + self.aesthetic_imgs_text = "" + + self.image_embs_name = None + self.image_embs = None + self.load_image_embs(None) + + def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None, + aesthetic_slerp=True, aesthetic_imgs_text="", + aesthetic_slerp_angle=0.15, + aesthetic_text_negative=False): + self.aesthetic_imgs_text = aesthetic_imgs_text + self.aesthetic_slerp_angle = aesthetic_slerp_angle + self.aesthetic_text_negative = aesthetic_text_negative + self.slerp = aesthetic_slerp + self.aesthetic_lr = aesthetic_lr + self.aesthetic_weight = aesthetic_weight + self.aesthetic_steps = aesthetic_steps + self.load_image_embs(image_embs_name) + + def set_skip(self, skip): + self.skip = skip + + def load_image_embs(self, image_embs_name): + if image_embs_name is None or len(image_embs_name) == 0 or image_embs_name == "None": + image_embs_name = None + self.image_embs_name = None + if image_embs_name is not None and self.image_embs_name != image_embs_name: + self.image_embs_name = image_embs_name + self.image_embs = torch.load(shared.aesthetic_embeddings[self.image_embs_name], map_location=device) + self.image_embs /= self.image_embs.norm(dim=-1, keepdim=True) + self.image_embs.requires_grad_(False) + + def __call__(self, z, remade_batch_tokens): + if not self.skip and self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name is not None: + tokenizer = shared.sd_model.cond_stage_model.tokenizer + if not opts.use_old_emphasis_implementation: + remade_batch_tokens = [ + [tokenizer.bos_token_id] + x[:75] + [tokenizer.eos_token_id] for x in + remade_batch_tokens] + + tokens = torch.asarray(remade_batch_tokens).to(device) + + model = copy.deepcopy(shared.clip_model).to(device) + model.requires_grad_(True) + if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0: + text_embs_2 = model.get_text_features( + **tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device)) + if self.aesthetic_text_negative: + text_embs_2 = self.image_embs - text_embs_2 + text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True) + img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle) + else: + img_embs = self.image_embs + + with torch.enable_grad(): + + # We optimize the model to maximize the similarity + optimizer = optim.Adam( + model.text_model.parameters(), lr=self.aesthetic_lr + ) + + for _ in trange(self.aesthetic_steps, desc="Aesthetic optimization"): + text_embs = model.get_text_features(input_ids=tokens) + text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True) + sim = text_embs @ img_embs.T + loss = -sim + optimizer.zero_grad() + loss.mean().backward() + optimizer.step() + + zn = model.text_model(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers) + if opts.CLIP_stop_at_last_layers > 1: + zn = zn.hidden_states[-opts.CLIP_stop_at_last_layers] + zn = model.text_model.final_layer_norm(zn) + else: + zn = zn.last_hidden_state + model.cpu() + del model + gc.collect() + torch.cuda.empty_cache() + zn = torch.concat([zn[77 * i:77 * (i + 1)] for i in range(max(z.shape[1] // 77, 1))], 1) + if self.slerp: + z = slerp(z, zn, self.aesthetic_weight) + else: + z = z * (1 - self.aesthetic_weight) + zn * self.aesthetic_weight + + return z diff --git a/modules/img2img.py b/modules/img2img.py index 24126774..4ed80c4b 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -56,7 +56,14 @@ def process_batch(p, input_dir, output_dir, args): processed_image.save(os.path.join(output_dir, filename)) -def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, *args): +def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, + aesthetic_lr=0, + aesthetic_weight=0, aesthetic_steps=0, + aesthetic_imgs=None, + aesthetic_slerp=False, + aesthetic_imgs_text="", + aesthetic_slerp_angle=0.15, + aesthetic_text_negative=False, *args): is_inpaint = mode == 1 is_batch = mode == 2 @@ -109,6 +116,11 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro inpainting_mask_invert=inpainting_mask_invert, ) + shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), + aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, + aesthetic_slerp_angle, + aesthetic_text_negative) + if shared.cmd_opts.enable_console_prompts: print(f"\nimg2img: {prompt}", file=shared.progress_print_out) diff --git a/modules/processing.py b/modules/processing.py index 1db26c3e..685f9fcd 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -146,7 +146,8 @@ class Processed: self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0] self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0] self.seed = int(self.seed if type(self.seed) != list else self.seed[0]) if self.seed is not None else -1 - self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1 + self.subseed = int( + self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1 self.all_prompts = all_prompts or [self.prompt] self.all_seeds = all_seeds or [self.seed] @@ -332,16 +333,9 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip() -def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, - aesthetic_imgs=None, aesthetic_slerp=False, aesthetic_imgs_text="", - aesthetic_slerp_angle=0.15, - aesthetic_text_negative=False) -> Processed: +def process_images(p: StableDiffusionProcessing) -> Processed: """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" - aesthetic_lr = float(aesthetic_lr) - aesthetic_weight = float(aesthetic_weight) - aesthetic_steps = int(aesthetic_steps) - if type(p.prompt) == list: assert (len(p.prompt) > 0) else: @@ -417,16 +411,10 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh # uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt]) # c = p.sd_model.get_learned_conditioning(prompts) with devices.autocast(): - if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"): - shared.sd_model.cond_stage_model.set_aesthetic_params() + shared.aesthetic_clip.set_skip(True) uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt], p.steps) - if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"): - shared.sd_model.cond_stage_model.set_aesthetic_params(aesthetic_lr, aesthetic_weight, - aesthetic_steps, aesthetic_imgs, - aesthetic_slerp, aesthetic_imgs_text, - aesthetic_slerp_angle, - aesthetic_text_negative) + shared.aesthetic_clip.set_skip(False) c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps) if len(model_hijack.comments) > 0: @@ -582,7 +570,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f - def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model) @@ -600,10 +587,12 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): seed_resize_from_w=self.seed_resize_from_w, p=self) samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning) - samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2] + samples = samples[:, :, self.truncate_y // 2:samples.shape[2] - self.truncate_y // 2, + self.truncate_x // 2:samples.shape[3] - self.truncate_x // 2] if opts.use_scale_latent_for_hires_fix: - samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") + samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), + mode="bilinear") else: decoded_samples = decode_first_stage(self.sd_model, samples) lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 5d0590af..227e7670 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -29,8 +29,8 @@ def apply_optimizations(): ldm.modules.diffusionmodules.model.nonlinearity = silu - - if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)): + if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and ( + 6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)): print("Applying xformers cross attention optimization.") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward @@ -118,33 +118,14 @@ class StableDiffusionModelHijack: return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count) -def slerp(low, high, val): - low_norm = low / torch.norm(low, dim=1, keepdim=True) - high_norm = high / torch.norm(high, dim=1, keepdim=True) - omega = torch.acos((low_norm * high_norm).sum(1)) - so = torch.sin(omega) - res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high - return res - - class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): def __init__(self, wrapped, hijack): super().__init__() self.wrapped = wrapped - self.clipModel = CLIPModel.from_pretrained( - self.wrapped.transformer.name_or_path - ) - del self.clipModel.vision_model - self.tokenizer = CLIPTokenizer.from_pretrained(self.wrapped.transformer.name_or_path) - self.hijack: StableDiffusionModelHijack = hijack - self.tokenizer = wrapped.tokenizer - # self.vision = CLIPVisionModel.from_pretrained(self.wrapped.transformer.name_or_path).eval() - self.image_embs_name = None - self.image_embs = None - self.load_image_embs(None) self.token_mults = {} - + self.hijack: StableDiffusionModelHijack = hijack + self.tokenizer = wrapped.tokenizer self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0] tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if @@ -164,28 +145,6 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if mult != 1.0: self.token_mults[ident] = mult - def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None, - aesthetic_slerp=True, aesthetic_imgs_text="", - aesthetic_slerp_angle=0.15, - aesthetic_text_negative=False): - self.aesthetic_imgs_text = aesthetic_imgs_text - self.aesthetic_slerp_angle = aesthetic_slerp_angle - self.aesthetic_text_negative = aesthetic_text_negative - self.slerp = aesthetic_slerp - self.aesthetic_lr = aesthetic_lr - self.aesthetic_weight = aesthetic_weight - self.aesthetic_steps = aesthetic_steps - self.load_image_embs(image_embs_name) - - def load_image_embs(self, image_embs_name): - if image_embs_name is None or len(image_embs_name) == 0 or image_embs_name == "None": - image_embs_name = None - if image_embs_name is not None and self.image_embs_name != image_embs_name: - self.image_embs_name = image_embs_name - self.image_embs = torch.load(shared.aesthetic_embeddings[self.image_embs_name], map_location=device) - self.image_embs /= self.image_embs.norm(dim=-1, keepdim=True) - self.image_embs.requires_grad_(False) - def tokenize_line(self, line, used_custom_terms, hijack_comments): id_end = self.wrapped.tokenizer.eos_token_id @@ -391,58 +350,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): z1 = self.process_tokens(tokens, multipliers) z = z1 if z is None else torch.cat((z, z1), axis=-2) - - if self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None: - if not opts.use_old_emphasis_implementation: - remade_batch_tokens = [ - [self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in - remade_batch_tokens] - - tokens = torch.asarray(remade_batch_tokens).to(device) - - model = copy.deepcopy(self.clipModel).to(device) - model.requires_grad_(True) - if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0: - text_embs_2 = model.get_text_features( - **self.tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device)) - if self.aesthetic_text_negative: - text_embs_2 = self.image_embs - text_embs_2 - text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True) - img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle) - else: - img_embs = self.image_embs - - with torch.enable_grad(): - - # We optimize the model to maximize the similarity - optimizer = optim.Adam( - model.text_model.parameters(), lr=self.aesthetic_lr - ) - - for i in trange(self.aesthetic_steps, desc="Aesthetic optimization"): - text_embs = model.get_text_features(input_ids=tokens) - text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True) - sim = text_embs @ img_embs.T - loss = -sim - optimizer.zero_grad() - loss.mean().backward() - optimizer.step() - - zn = model.text_model(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers) - if opts.CLIP_stop_at_last_layers > 1: - zn = zn.hidden_states[-opts.CLIP_stop_at_last_layers] - zn = model.text_model.final_layer_norm(zn) - else: - zn = zn.last_hidden_state - model.cpu() - del model - - zn = torch.concat([zn for i in range(z.shape[1] // 77)], 1) - if self.slerp: - z = slerp(z, zn, self.aesthetic_weight) - else: - z = z * (1 - self.aesthetic_weight) + zn * self.aesthetic_weight - + z = shared.aesthetic_clip(z, remade_batch_tokens) remade_batch_tokens = rem_tokens batch_multipliers = rem_multipliers i += 1 diff --git a/modules/sd_models.py b/modules/sd_models.py index 3aa21ec1..8e4ee435 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -20,7 +20,7 @@ checkpoints_loaded = collections.OrderedDict() try: # this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start. - from transformers import logging + from transformers import logging, CLIPModel logging.set_verbosity_error() except Exception: @@ -196,6 +196,9 @@ def load_model(): sd_hijack.model_hijack.hijack(sd_model) + if shared.clip_model is None or shared.clip_model.transformer.name_or_path != sd_model.cond_stage_model.wrapped.transformer.name_or_path: + shared.clip_model = CLIPModel.from_pretrained(sd_model.cond_stage_model.wrapped.transformer.name_or_path) + sd_model.eval() print(f"Model loaded.") diff --git a/modules/shared.py b/modules/shared.py index e2c98b2d..e19ca779 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -3,6 +3,7 @@ import datetime import json import os import sys +from collections import OrderedDict import gradio as gr import tqdm @@ -94,15 +95,15 @@ os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True) hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) loaded_hypernetwork = None -aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in - os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} -aesthetic_embeddings = aesthetic_embeddings | {"None": None} +aesthetic_embeddings = {} def update_aesthetic_embeddings(): global aesthetic_embeddings aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")} - aesthetic_embeddings = aesthetic_embeddings | {"None": None} + aesthetic_embeddings = OrderedDict(**{"None": None}, **aesthetic_embeddings) + +update_aesthetic_embeddings() def reload_hypernetworks(): global hypernetworks @@ -381,6 +382,11 @@ sd_upscalers = [] sd_model = None +clip_model = None + +from modules.aesthetic_clip import AestheticCLIP +aesthetic_clip = AestheticCLIP() + progress_print_out = sys.stdout diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 68ceffe3..23bb4b6a 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -49,7 +49,7 @@ class PersonalizedBase(Dataset): print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): try: - image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.Resampling.BICUBIC) + image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC) except Exception: continue diff --git a/modules/txt2img.py b/modules/txt2img.py index 8f394d05..6cbc50fc 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -1,12 +1,17 @@ import modules.scripts -from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images +from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, \ + StableDiffusionProcessingImg2Img, process_images from modules.shared import opts, cmd_opts import modules.shared as shared import modules.processing as processing from modules.ui import plaintext_to_html -def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int, firstphase_height: int,aesthetic_lr=0, +def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, + restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, + subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, + height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int, + firstphase_height: int, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, aesthetic_imgs=None, aesthetic_slerp=False, @@ -41,15 +46,17 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: firstphase_height=firstphase_height if enable_hr else None, ) + shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), + aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, + aesthetic_text_negative) + if cmd_opts.enable_console_prompts: print(f"\ntxt2img: {prompt}", file=shared.progress_print_out) processed = modules.scripts.scripts_txt2img.run(p, *args) if processed is None: - processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp,aesthetic_imgs_text, - aesthetic_slerp_angle, - aesthetic_text_negative) + processed = process_images(p) shared.total_tqdm.clear() @@ -61,4 +68,3 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: processed.images = [] return processed.images, generation_info_js, plaintext_to_html(processed.info) - diff --git a/modules/ui.py b/modules/ui.py index 4069f0d2..0e5d73f0 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -43,7 +43,7 @@ from modules.images import save_image import modules.textual_inversion.ui import modules.hypernetworks.ui -import modules.aesthetic_clip +import modules.aesthetic_clip as aesthetic_clip import modules.images_history as img_his @@ -593,23 +593,25 @@ def create_ui(wrap_gradio_gpu_call): width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) - with gr.Group(): - with gr.Accordion("Open for Clip Aesthetic!",open=False): - with gr.Row(): - aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9) - aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5) - - with gr.Row(): - aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001") - aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False) - aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), - label="Aesthetic imgs embedding", - value="None") - - with gr.Row(): - aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="") - aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1) - aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False) + # with gr.Group(): + # with gr.Accordion("Open for Clip Aesthetic!",open=False): + # with gr.Row(): + # aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9) + # aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5) + # + # with gr.Row(): + # aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001") + # aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False) + # aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), + # label="Aesthetic imgs embedding", + # value="None") + # + # with gr.Row(): + # aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="") + # aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1) + # aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False) + + aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative = aesthetic_clip.create_ui() with gr.Row(): @@ -840,6 +842,9 @@ def create_ui(wrap_gradio_gpu_call): width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) + aesthetic_weight_im, aesthetic_steps_im, aesthetic_lr_im, aesthetic_slerp_im, aesthetic_imgs_im, aesthetic_imgs_text_im, aesthetic_slerp_angle_im, aesthetic_text_negative_im = aesthetic_clip.create_ui() + + with gr.Row(): restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1) tiling = gr.Checkbox(label='Tiling', value=False) @@ -944,6 +949,14 @@ def create_ui(wrap_gradio_gpu_call): inpainting_mask_invert, img2img_batch_input_dir, img2img_batch_output_dir, + aesthetic_lr_im, + aesthetic_weight_im, + aesthetic_steps_im, + aesthetic_imgs_im, + aesthetic_slerp_im, + aesthetic_imgs_text_im, + aesthetic_slerp_angle_im, + aesthetic_text_negative_im, ] + custom_inputs, outputs=[ img2img_gallery, @@ -1283,7 +1296,7 @@ def create_ui(wrap_gradio_gpu_call): ) create_embedding_ae.click( - fn=modules.aesthetic_clip.generate_imgs_embd, + fn=aesthetic_clip.generate_imgs_embd, inputs=[ new_embedding_name_ae, process_src_ae, @@ -1291,6 +1304,7 @@ def create_ui(wrap_gradio_gpu_call): ], outputs=[ aesthetic_imgs, + aesthetic_imgs_im, ti_output, ti_outcome, ] -- cgit v1.2.3 From 60251c9456f5472784862896c2f97e38feb42482 Mon Sep 17 00:00:00 2001 From: arcticfaded Date: Mon, 17 Oct 2022 06:58:42 +0000 Subject: initial prototype by borrowing contracts --- modules/api/api.py | 60 ++++++++++++++++++++++++++++++++++++++++++++ modules/processing.py | 2 +- modules/shared.py | 2 +- webui.py | 69 +++++++++++++++++++++++++++++---------------------- 4 files changed, 102 insertions(+), 31 deletions(-) create mode 100644 modules/api/api.py (limited to 'modules/shared.py') diff --git a/modules/api/api.py b/modules/api/api.py new file mode 100644 index 00000000..9d7c699d --- /dev/null +++ b/modules/api/api.py @@ -0,0 +1,60 @@ +from modules.api.processing import StableDiffusionProcessingAPI +from modules.processing import StableDiffusionProcessingTxt2Img, process_images +import modules.shared as shared +import uvicorn +from fastapi import FastAPI, Body, APIRouter +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field, Json +import json +import io +import base64 + +app = FastAPI() + +class TextToImageResponse(BaseModel): + images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + parameters: Json + info: Json + + +class Api: + def __init__(self, txt2img, img2img, run_extras, run_pnginfo): + self.router = APIRouter() + app.add_api_route("/v1/txt2img", self.text2imgapi, methods=["POST"]) + + def text2imgapi(self, txt2imgreq: StableDiffusionProcessingAPI ): + print(txt2imgreq) + p = StableDiffusionProcessingTxt2Img(**vars(txt2imgreq)) + p.sd_model = shared.sd_model + print(p) + processed = process_images(p) + + b64images = [] + for i in processed.images: + buffer = io.BytesIO() + i.save(buffer, format="png") + b64images.append(base64.b64encode(buffer.getvalue())) + + response = { + "images": b64images, + "info": processed.js(), + "parameters": json.dumps(vars(txt2imgreq)) + } + + + return TextToImageResponse(images=b64images, parameters=json.dumps(vars(txt2imgreq)), info=json.dumps(processed.info)) + + + + def img2imgendoint(self): + raise NotImplementedError + + def extrasendoint(self): + raise NotImplementedError + + def pnginfoendoint(self): + raise NotImplementedError + + def launch(self, server_name, port): + app.include_router(self.router) + uvicorn.run(app, host=server_name, port=port) \ No newline at end of file diff --git a/modules/processing.py b/modules/processing.py index deb6125e..4a7c6ccc 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -723,4 +723,4 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): del x devices.torch_gc() - return samples + return samples \ No newline at end of file diff --git a/modules/shared.py b/modules/shared.py index c2775603..6c6405fd 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -74,7 +74,7 @@ parser.add_argument("--disable-console-progressbars", action='store_true', help= parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None) parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) - +parser.add_argument("--api", action='store_true', help="use api=True to launch the api instead of the webui") cmd_opts = parser.parse_args() restricted_opts = [ diff --git a/webui.py b/webui.py index fe0ce321..cd8a99ea 100644 --- a/webui.py +++ b/webui.py @@ -97,40 +97,51 @@ def webui(): os._exit(0) signal.signal(signal.SIGINT, sigint_handler) + + if cmd_opts.api: + from modules.api.api import Api + api = Api(txt2img=modules.txt2img.txt2img, + img2img=modules.img2img.img2img, + run_extras=modules.extras.run_extras, + run_pnginfo=modules.extras.run_pnginfo) - while 1: - - demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) - - app, local_url, share_url = demo.launch( - share=cmd_opts.share, - server_name="0.0.0.0" if cmd_opts.listen else None, - server_port=cmd_opts.port, - debug=cmd_opts.gradio_debug, - auth=[tuple(cred.split(':')) for cred in cmd_opts.gradio_auth.strip('"').split(',')] if cmd_opts.gradio_auth else None, - inbrowser=cmd_opts.autolaunch, - prevent_thread_lock=True - ) - - app.add_middleware(GZipMiddleware, minimum_size=1000) + api.launch(server_name="0.0.0.0" if cmd_opts.listen else "127.0.0.1", + port=cmd_opts.port if cmd_opts.port else 7861) + else: while 1: - time.sleep(0.5) - if getattr(demo, 'do_restart', False): - time.sleep(0.5) - demo.close() - time.sleep(0.5) - break - sd_samplers.set_samplers() + demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) - print('Reloading Custom Scripts') - modules.scripts.reload_scripts(os.path.join(script_path, "scripts")) - print('Reloading modules: modules.ui') - importlib.reload(modules.ui) - print('Refreshing Model List') - modules.sd_models.list_models() - print('Restarting Gradio') + app, local_url, share_url = demo.launch( + share=cmd_opts.share, + server_name="0.0.0.0" if cmd_opts.listen else None, + server_port=cmd_opts.port, + debug=cmd_opts.gradio_debug, + auth=[tuple(cred.split(':')) for cred in cmd_opts.gradio_auth.strip('"').split(',')] if cmd_opts.gradio_auth else None, + inbrowser=cmd_opts.autolaunch, + prevent_thread_lock=True + ) + + app.add_middleware(GZipMiddleware, minimum_size=1000) + + while 1: + time.sleep(0.5) + if getattr(demo, 'do_restart', False): + time.sleep(0.5) + demo.close() + time.sleep(0.5) + break + + sd_samplers.set_samplers() + + print('Reloading Custom Scripts') + modules.scripts.reload_scripts(os.path.join(script_path, "scripts")) + print('Reloading modules: modules.ui') + importlib.reload(modules.ui) + print('Refreshing Model List') + modules.sd_models.list_models() + print('Restarting Gradio') if __name__ == "__main__": -- cgit v1.2.3 From 8c6a981d5d9ef30381ac2327460285111550acbc Mon Sep 17 00:00:00 2001 From: Michoko Date: Mon, 17 Oct 2022 11:05:05 +0200 Subject: Added dark mode switch Launch the UI in dark mode with the --dark-mode switch --- javascript/ui.js | 7 +++++++ modules/shared.py | 2 +- modules/ui.py | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'modules/shared.py') diff --git a/javascript/ui.js b/javascript/ui.js index 9e1bed4c..bfa72885 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -1,5 +1,12 @@ // various functions for interation with ui.py not large enough to warrant putting them in separate files +function go_dark_mode(){ + gradioURL = window.location.href + if (!gradioURL.endsWith('?__theme=dark')) { + window.location.replace(gradioURL + '?__theme=dark'); + } +} + function selected_gallery_index(){ var buttons = gradioApp().querySelectorAll('[style="display: block;"].tabitem .gallery-item') var button = gradioApp().querySelector('[style="display: block;"].tabitem .gallery-item.\\!ring-2') diff --git a/modules/shared.py b/modules/shared.py index c2775603..cbf158e4 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -69,13 +69,13 @@ parser.add_argument("--gradio-img2img-tool", type=str, help='gradio image upload parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last") parser.add_argument("--styles-file", type=str, help="filename to use for styles", default=os.path.join(script_path, 'styles.csv')) parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False) +parser.add_argument("--dark-mode", action='store_true', help="launches the UI in dark mode", default=False) parser.add_argument("--use-textbox-seed", action='store_true', help="use textbox for seeds in UI (no up/down, but possible to input long seeds)", default=False) parser.add_argument("--disable-console-progressbars", action='store_true', help="do not output progressbars to console", default=False) parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None) parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) - cmd_opts = parser.parse_args() restricted_opts = [ "samples_filename_pattern", diff --git a/modules/ui.py b/modules/ui.py index 43dc88fc..a0cd052e 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1783,6 +1783,8 @@ for filename in sorted(os.listdir(jsdir)): with open(os.path.join(jsdir, filename), "r", encoding="utf8") as jsfile: javascript += f"\n" +if cmd_opts.dark_mode: + javascript += "\n\n" if 'gradio_routes_templates_response' not in globals(): def template_response(*args, **kwargs): -- cgit v1.2.3 From 665beebc0825a6fad410c8252f27f6f6f0bd900b Mon Sep 17 00:00:00 2001 From: Michoko Date: Mon, 17 Oct 2022 18:24:24 +0200 Subject: Use of a --theme argument for more flexibility Added possibility to set the theme (light or dark) --- javascript/ui.js | 6 +++--- modules/shared.py | 2 +- modules/ui.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'modules/shared.py') diff --git a/javascript/ui.js b/javascript/ui.js index bfa72885..cfd0dcd3 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -1,9 +1,9 @@ // various functions for interation with ui.py not large enough to warrant putting them in separate files -function go_dark_mode(){ +function set_theme(theme){ gradioURL = window.location.href - if (!gradioURL.endsWith('?__theme=dark')) { - window.location.replace(gradioURL + '?__theme=dark'); + if (!gradioURL.includes('?__theme=')) { + window.location.replace(gradioURL + '?__theme=' + theme); } } diff --git a/modules/shared.py b/modules/shared.py index cbf158e4..fa084c69 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -69,7 +69,7 @@ parser.add_argument("--gradio-img2img-tool", type=str, help='gradio image upload parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last") parser.add_argument("--styles-file", type=str, help="filename to use for styles", default=os.path.join(script_path, 'styles.csv')) parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False) -parser.add_argument("--dark-mode", action='store_true', help="launches the UI in dark mode", default=False) +parser.add_argument("--theme", type=str, help="launches the UI with light or dark theme", default=None) parser.add_argument("--use-textbox-seed", action='store_true', help="use textbox for seeds in UI (no up/down, but possible to input long seeds)", default=False) parser.add_argument("--disable-console-progressbars", action='store_true', help="do not output progressbars to console", default=False) parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) diff --git a/modules/ui.py b/modules/ui.py index a0cd052e..d41715fa 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1783,8 +1783,8 @@ for filename in sorted(os.listdir(jsdir)): with open(os.path.join(jsdir, filename), "r", encoding="utf8") as jsfile: javascript += f"\n" -if cmd_opts.dark_mode: - javascript += "\n\n" +if cmd_opts.theme is not None: + javascript += f"\n\n" if 'gradio_routes_templates_response' not in globals(): def template_response(*args, **kwargs): -- cgit v1.2.3 From 1df3ff25e6fe2e3f308e45f7a6dd37fb4f1988e6 Mon Sep 17 00:00:00 2001 From: Ryan Voots Date: Mon, 17 Oct 2022 12:58:34 -0400 Subject: Add --nowebui as a means of disabling the webui and run on the other port --- modules/shared.py | 3 ++- webui.py | 35 +++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'modules/shared.py') diff --git a/modules/shared.py b/modules/shared.py index 6c6405fd..8b436970 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -74,7 +74,8 @@ parser.add_argument("--disable-console-progressbars", action='store_true', help= parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None) parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) -parser.add_argument("--api", action='store_true', help="use api=True to launch the api instead of the webui") +parser.add_argument("--api", action='store_true', help="use api=True to launch the api with the webui") +parser.add_argument("--nowebui", action='store_true', help="use api=True to launch the api instead of the webui") cmd_opts = parser.parse_args() restricted_opts = [ diff --git a/webui.py b/webui.py index 6b55fbed..6212be01 100644 --- a/webui.py +++ b/webui.py @@ -95,16 +95,34 @@ def initialize(): signal.signal(signal.SIGINT, sigint_handler) -def api(): +def create_api(app): from modules.api.api import Api api = Api(app) + return api + +def wait_on_server(demo=None): + while 1: + time.sleep(0.5) + if demo and getattr(demo, 'do_restart', False): + time.sleep(0.5) + demo.close() + time.sleep(0.5) + break + +def api_only(): + initialize() + + app = FastAPI() + app.add_middleware(GZipMiddleware, minimum_size=1000) + api = create_api(app) + + api.launch(server_name="0.0.0.0" if cmd_opts.listen else "127.0.0.1", port=cmd_opts.port if cmd_opts.port else 7861) def webui(launch_api=False): initialize() while 1: - demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) app, local_url, share_url = demo.launch( @@ -120,15 +138,9 @@ def webui(launch_api=False): app.add_middleware(GZipMiddleware, minimum_size=1000) if (launch_api): - api(app) + create_api(app) - while 1: - time.sleep(0.5) - if getattr(demo, 'do_restart', False): - time.sleep(0.5) - demo.close() - time.sleep(0.5) - break + wait_on_server(demo) sd_samplers.set_samplers() @@ -142,6 +154,9 @@ def webui(launch_api=False): if __name__ == "__main__": + if not cmd_opts.nowebui: + api_only() + if cmd_opts.api: webui(True) else: -- cgit v1.2.3 From 8b02662215917d39f76f86b703a322818d5a8ad4 Mon Sep 17 00:00:00 2001 From: trufty Date: Mon, 17 Oct 2022 10:58:21 -0400 Subject: Disable auto weights swap with config option --- modules/shared.py | 1 + modules/ui.py | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'modules/shared.py') diff --git a/modules/shared.py b/modules/shared.py index 9603d26e..8a1d1881 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -266,6 +266,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "enable_emphasis": OptionInfo(True, "Emphasis: use (text) to make model pay more attention to text and [text] to make it pay less attention"), "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), "enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"), + "disable_weights_auto_swap": OptionInfo(False, "Disable auto swapping weights to match model hash in prompts"), "comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }), "filter_nsfw": OptionInfo(False, "Filter NSFW content"), 'CLIP_stop_at_last_layers': OptionInfo(1, "Stop At last layers of CLIP model", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}), diff --git a/modules/ui.py b/modules/ui.py index 1dae4a65..75eb0b0c 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -542,6 +542,10 @@ def apply_setting(key, value): if value is None: return gr.update() + # dont allow model to be swapped when model hash exists in prompt + if key == "sd_model_checkpoint" and opts.disable_weights_auto_swap: + return gr.update() + if key == "sd_model_checkpoint": ckpt_info = sd_models.get_closet_checkpoint_match(value) -- cgit v1.2.3 From d2f459c5cf9f728256775dc1c3380c7e9a7e27fb Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 18 Oct 2022 14:22:52 +0300 Subject: clarify the comment for the new option from #2959 and move it to UI section. --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/shared.py') diff --git a/modules/shared.py b/modules/shared.py index 8a1d1881..c0d87168 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -266,7 +266,6 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "enable_emphasis": OptionInfo(True, "Emphasis: use (text) to make model pay more attention to text and [text] to make it pay less attention"), "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), "enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"), - "disable_weights_auto_swap": OptionInfo(False, "Disable auto swapping weights to match model hash in prompts"), "comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }), "filter_nsfw": OptionInfo(False, "Filter NSFW content"), 'CLIP_stop_at_last_layers': OptionInfo(1, "Stop At last layers of CLIP model", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}), @@ -294,6 +293,7 @@ options_templates.update(options_section(('ui', "User interface"), { "do_not_show_images": OptionInfo(False, "Do not show any images in results for web"), "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"), "add_model_name_to_info": OptionInfo(False, "Add model name to generation information"), + "disable_weights_auto_swap": OptionInfo(False, "When reading generation parameters from text into UI (from PNG info or pasted text), do not change the selected model/checkpoint."), "font": OptionInfo("", "Font for image grids that have text"), "js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"), "js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"), -- cgit v1.2.3 From 4c605c5174a9b211c3a88e9aff5f5be92b53fd92 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 16 Oct 2022 17:24:06 +0100 Subject: add shared option for update check --- modules/shared.py | 1 + 1 file changed, 1 insertion(+) (limited to 'modules/shared.py') diff --git a/modules/shared.py b/modules/shared.py index c0d87168..50dc46ae 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -76,6 +76,7 @@ parser.add_argument("--disable-console-progressbars", action='store_true', help= parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None) parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) +parser.add_argument("--update-check", action='store_true', help="enable http check to confirm that the currently running version is the most recent release.", default=False) cmd_opts = parser.parse_args() restricted_opts = [ -- cgit v1.2.3 From 433a7525c1f5eb5963340e0cc45d31038ede3f7e Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 18 Oct 2022 15:18:02 +0300 Subject: remove shared option for update check (because it is not an argument of webui) have launch.py examine both COMMANDLINE_ARGS as well as argv for its arguments --- launch.py | 19 +++++++++---------- modules/shared.py | 1 - 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'modules/shared.py') diff --git a/launch.py b/launch.py index 9d9dd1b6..7b15e78e 100644 --- a/launch.py +++ b/launch.py @@ -127,13 +127,14 @@ def prepare_enviroment(): codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af") blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9") - args = shlex.split(commandline_args) + sys.argv += shlex.split(commandline_args) - args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test') - args, reinstall_xformers = extract_arg(args, '--reinstall-xformers') - xformers = '--xformers' in args - deepdanbooru = '--deepdanbooru' in args - ngrok = '--ngrok' in args + sys.argv, skip_torch_cuda_test = extract_arg(sys.argv, '--skip-torch-cuda-test') + sys.argv, reinstall_xformers = extract_arg(sys.argv, '--reinstall-xformers') + sys.argv, update_check = extract_arg(sys.argv, '--update-check') + xformers = '--xformers' in sys.argv + deepdanbooru = '--deepdanbooru' in sys.argv + ngrok = '--ngrok' in sys.argv try: commit = run(f"{git} rev-parse HEAD").strip() @@ -180,12 +181,10 @@ def prepare_enviroment(): run_pip(f"install -r {requirements_file}", "requirements for Web UI") - sys.argv += args - - if '--update-check' in args: + if update_check: version_check(commit) - if "--exit" in args: + if "--exit" in sys.argv: print("Exiting because of --exit argument") exit(0) diff --git a/modules/shared.py b/modules/shared.py index 50dc46ae..c0d87168 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -76,7 +76,6 @@ parser.add_argument("--disable-console-progressbars", action='store_true', help= parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False) parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None) parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) -parser.add_argument("--update-check", action='store_true', help="enable http check to confirm that the currently running version is the most recent release.", default=False) cmd_opts = parser.parse_args() restricted_opts = [ -- cgit v1.2.3 From 6021f7a75f7b5208a2be15cda5526028152f922d Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 00:51:36 +0900 Subject: add options to custom hypernetwork layer structure --- .gitignore | 1 + modules/hypernetworks/hypernetwork.py | 88 ++++++++++++++++++++++++++--------- modules/shared.py | 4 +- webui.py | 6 ++- 4 files changed, 75 insertions(+), 24 deletions(-) (limited to 'modules/shared.py') diff --git a/.gitignore b/.gitignore index 69785b3e..4794865c 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ __pycache__ notification.mp3 /SwinIR /textual_inversion +/hypernetwork diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 4905710e..cadb9911 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -1,52 +1,98 @@ +import csv import datetime import glob import html import os import sys import traceback -import tqdm -import csv +import modules.textual_inversion.dataset import torch - -from ldm.util import default -from modules import devices, shared, processing, sd_models -import torch -from torch import einsum +import tqdm from einops import rearrange, repeat -import modules.textual_inversion.dataset +from ldm.util import default +from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler +from torch import einsum + + +def parse_layer_structure(dim, state_dict): + i = 0 + res = [1] + while (key := "linear.{}.weight".format(i)) in state_dict: + weight = state_dict[key] + res.append(len(weight) // dim) + i += 1 + return res class HypernetworkModule(torch.nn.Module): multiplier = 1.0 + layer_structure = None + add_layer_norm = False def __init__(self, dim, state_dict=None): super().__init__() + if (state_dict is None or 'linear.0.weight' not in state_dict) and self.layer_structure is None: + layer_structure = (1, 2, 1) + else: + if self.layer_structure is not None: + assert self.layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert self.layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" + layer_structure = self.layer_structure + else: + layer_structure = parse_layer_structure(dim, state_dict) + + linears = [] + for i in range(len(layer_structure) - 1): + linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + if self.add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - self.linear1 = torch.nn.Linear(dim, dim * 2) - self.linear2 = torch.nn.Linear(dim * 2, dim) + self.linear = torch.nn.Sequential(*linears) if state_dict is not None: - self.load_state_dict(state_dict, strict=True) + try: + self.load_state_dict(state_dict) + except RuntimeError: + self.try_load_previous(state_dict) else: - - self.linear1.weight.data.normal_(mean=0.0, std=0.01) - self.linear1.bias.data.zero_() - self.linear2.weight.data.normal_(mean=0.0, std=0.01) - self.linear2.bias.data.zero_() + for layer in self.linear: + layer.weight.data.normal_(mean = 0.0, std = 0.01) + layer.bias.data.zero_() self.to(devices.device) + def try_load_previous(self, state_dict): + states = self.state_dict() + states['linear.0.bias'].copy_(state_dict['linear1.bias']) + states['linear.0.weight'].copy_(state_dict['linear1.weight']) + states['linear.1.bias'].copy_(state_dict['linear2.bias']) + states['linear.1.weight'].copy_(state_dict['linear2.weight']) + def forward(self, x): - return x + (self.linear2(self.linear1(x))) * self.multiplier + return x + self.linear(x) * self.multiplier + + def trainables(self): + res = [] + for layer in self.linear: + res += [layer.weight, layer.bias] + return res def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength +def apply_layer_structure(value=None): + HypernetworkModule.layer_structure = value if value is not None else shared.opts.sd_hypernetwork_layer_structure + + +def apply_layer_norm(value=None): + HypernetworkModule.add_layer_norm = value if value is not None else shared.opts.sd_hypernetwork_add_layer_norm + + class Hypernetwork: filename = None name = None @@ -68,7 +114,7 @@ class Hypernetwork: for k, layers in self.layers.items(): for layer in layers: layer.train() - res += [layer.linear1.weight, layer.linear1.bias, layer.linear2.weight, layer.linear2.bias] + res += layer.trainables() return res @@ -226,7 +272,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size) - + assert ds.length > 1, "Dataset should contain more than 1 images" if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) @@ -261,7 +307,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log with torch.autocast("cuda"): c = stack_conds([entry.cond for entry in entries]).to(devices.device) -# c = torch.vstack([entry.cond for entry in entries]).to(devices.device) + c = torch.vstack([entry.cond for entry in entries]).to(devices.device) x = torch.stack([entry.latent for entry in entries]).to(devices.device) loss = shared.sd_model(x, c)[0] del x @@ -283,7 +329,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { "loss": f"{mean_loss:.7f}", - "learn_rate": scheduler.learn_rate + "learn_rate": f"{scheduler.learn_rate:.7f}" }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: diff --git a/modules/shared.py b/modules/shared.py index c0d87168..c87ce70e 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -13,7 +13,7 @@ import modules.memmon import modules.sd_models import modules.styles import modules.devices as devices -from modules import sd_samplers, sd_models, localization +from modules import sd_models, sd_samplers, localization from modules.hypernetworks import hypernetwork from modules.paths import models_path, script_path, sd_path @@ -258,6 +258,8 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, refresh=sd_models.list_models), "sd_checkpoint_cache": OptionInfo(0, "Checkpoints to cache in RAM", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}), "sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks), + "sd_hypernetwork_layer_structure": OptionInfo(None, "Hypernetwork layer structure Default: (1,2,1).", gr.Dropdown, lambda: {"choices": [(1, 2, 1), (1, 2, 2, 1), (1, 2, 4, 2, 1)]}), + "sd_hypernetwork_add_layer_norm": OptionInfo(False, "Add layer normalization to hypernetwork architecture."), "sd_hypernetwork_strength": OptionInfo(1.0, "Hypernetwork strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.001}), "img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."), "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"), diff --git a/webui.py b/webui.py index fe0ce321..86e98ad0 100644 --- a/webui.py +++ b/webui.py @@ -86,11 +86,13 @@ def initialize(): shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) shared.opts.onchange("sd_hypernetwork_strength", modules.hypernetworks.hypernetwork.apply_strength) + shared.opts.onchange("sd_hypernetwork_layer_structure", modules.hypernetworks.hypernetwork.apply_layer_structure) + shared.opts.onchange("sd_hypernetwork_add_layer_norm", modules.hypernetworks.hypernetwork.apply_layer_norm) def webui(): initialize() - + # make the program just exit at ctrl+c without waiting for anything def sigint_handler(sig, frame): print(f'Interrupted with signal {sig} in {frame}') @@ -101,7 +103,7 @@ def webui(): while 1: demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) - + app, local_url, share_url = demo.launch( share=cmd_opts.share, server_name="0.0.0.0" if cmd_opts.listen else None, -- cgit v1.2.3 From 7f2095c6c8db82a5c9cd7c7177f6ba856a2cc676 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 01:01:22 +0900 Subject: update --- modules/shared.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/shared.py') diff --git a/modules/shared.py b/modules/shared.py index c87ce70e..6b6d5c41 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -13,7 +13,7 @@ import modules.memmon import modules.sd_models import modules.styles import modules.devices as devices -from modules import sd_models, sd_samplers, localization +from modules import sd_samplers, sd_models, localization from modules.hypernetworks import hypernetwork from modules.paths import models_path, script_path, sd_path @@ -135,7 +135,7 @@ class State: self.job_no += 1 self.sampling_step = 0 self.current_image_sampling_step = 0 - + def get_job_timestamp(self): return datetime.datetime.now().strftime("%Y%m%d%H%M%S") # shouldn't this return job_timestamp? -- cgit v1.2.3 From 42fbda83bb9830af18187fddb50c1bedd01da502 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 14:30:33 +0000 Subject: layer options moves into create hnet ui --- modules/hypernetworks/hypernetwork.py | 64 +++++++++++++++++------------------ modules/hypernetworks/ui.py | 9 +++-- modules/shared.py | 2 -- modules/ui.py | 8 +++-- webui.py | 8 ++--- 5 files changed, 48 insertions(+), 43 deletions(-) (limited to 'modules/shared.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 583ada31..7d519cd9 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -19,37 +19,21 @@ from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler -def parse_layer_structure(dim, state_dict): - i = 0 - res = [1] - while (key := "linear.{}.weight".format(i)) in state_dict: - weight = state_dict[key] - res.append(len(weight) // dim) - i += 1 - return res - - class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - layer_structure = None - add_layer_norm = False - def __init__(self, dim, state_dict=None): + def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): super().__init__() - if (state_dict is None or 'linear.0.weight' not in state_dict) and self.layer_structure is None: - layer_structure = (1, 2, 1) + if layer_structure is not None: + assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" else: - if self.layer_structure is not None: - assert self.layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" - assert self.layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - layer_structure = self.layer_structure - else: - layer_structure = parse_layer_structure(dim, state_dict) + layer_structure = parse_layer_structure(dim, state_dict) linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - if self.add_layer_norm: + if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) self.linear = torch.nn.Sequential(*linears) @@ -77,38 +61,47 @@ class HypernetworkModule(torch.nn.Module): return x + self.linear(x) * self.multiplier def trainables(self): - res = [] + layer_structure = [] for layer in self.linear: - res += [layer.weight, layer.bias] - return res + layer_structure += [layer.weight, layer.bias] + return layer_structure def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength -def apply_layer_structure(value=None): - HypernetworkModule.layer_structure = value if value is not None else shared.opts.sd_hypernetwork_layer_structure +def parse_layer_structure(dim, state_dict): + i = 0 + layer_structure = [1] + while (key := "linear.{}.weight".format(i)) in state_dict: + weight = state_dict[key] + layer_structure.append(len(weight) // dim) + i += 1 -def apply_layer_norm(value=None): - HypernetworkModule.add_layer_norm = value if value is not None else shared.opts.sd_hypernetwork_add_layer_norm + return layer_structure class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False): self.filename = None self.name = name self.layers = {} self.step = 0 self.sd_checkpoint = None self.sd_checkpoint_name = None + self.layer_structure = layer_structure + self.add_layer_norm = add_layer_norm for size in enable_sizes or []: - self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size)) + self.layers[size] = ( + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), + ) def weights(self): res = [] @@ -128,6 +121,8 @@ class Hypernetwork: state_dict['step'] = self.step state_dict['name'] = self.name + state_dict['layer_structure'] = self.layer_structure + state_dict['is_layer_norm'] = self.add_layer_norm state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -142,10 +137,15 @@ class Hypernetwork: for size, sd in state_dict.items(): if type(size) == int: - self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1])) + self.layers[size] = ( + HypernetworkModule(size, sd[0], state_dict["layer_structure"], state_dict["is_layer_norm"]), + HypernetworkModule(size, sd[1], state_dict["layer_structure"], state_dict["is_layer_norm"]), + ) self.name = state_dict.get('name', self.name) self.step = state_dict.get('step', 0) + self.layer_structure = state_dict.get('layer_structure', None) + self.add_layer_norm = state_dict.get('is_layer_norm', False) self.sd_checkpoint = state_dict.get('sd_checkpoint', None) self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index dfa599af..7e8ea95e 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -9,11 +9,16 @@ from modules import sd_hijack, shared, devices from modules.hypernetworks import hypernetwork -def create_hypernetwork(name, enable_sizes): +def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False): fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") assert not os.path.exists(fn), f"file {fn} already exists" - hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes]) + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( + name=name, + enable_sizes=[int(x) for x in enable_sizes], + layer_structure=layer_structure, + add_layer_norm=add_layer_norm, + ) hypernet.save(fn) shared.reload_hypernetworks() diff --git a/modules/shared.py b/modules/shared.py index 0540cae9..faede821 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -260,8 +260,6 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, refresh=sd_models.list_models), "sd_checkpoint_cache": OptionInfo(0, "Checkpoints to cache in RAM", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}), "sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks), - "sd_hypernetwork_layer_structure": OptionInfo(None, "Hypernetwork layer structure Default: (1,2,1).", gr.Dropdown, lambda: {"choices": [(1, 2, 1), (1, 2, 2, 1), (1, 2, 4, 2, 1)]}), - "sd_hypernetwork_add_layer_norm": OptionInfo(False, "Add layer normalization to hypernetwork architecture."), "sd_hypernetwork_strength": OptionInfo(1.0, "Hypernetwork strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.001}), "img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."), "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"), diff --git a/modules/ui.py b/modules/ui.py index ca46343f..d9ee462f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -458,14 +458,14 @@ def create_toprow(is_img2img): with gr.Row(): with gr.Column(scale=80): with gr.Row(): - prompt = gr.Textbox(label="Prompt", elem_id=f"{id_part}_prompt", show_label=False, lines=2, + prompt = gr.Textbox(label="Prompt", elem_id=f"{id_part}_prompt", show_label=False, lines=2, placeholder="Prompt (press Ctrl+Enter or Alt+Enter to generate)" ) with gr.Row(): with gr.Column(scale=80): with gr.Row(): - negative_prompt = gr.Textbox(label="Negative prompt", elem_id=f"{id_part}_neg_prompt", show_label=False, lines=2, + negative_prompt = gr.Textbox(label="Negative prompt", elem_id=f"{id_part}_neg_prompt", show_label=False, lines=2, placeholder="Negative prompt (press Ctrl+Enter or Alt+Enter to generate)" ) @@ -1198,6 +1198,8 @@ def create_ui(wrap_gradio_gpu_call): with gr.Tab(label="Create hypernetwork"): new_hypernetwork_name = gr.Textbox(label="Name") new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) + new_hypernetwork_layer_structure = gr.Dropdown(label="Hypernetwork layer structure", choices=[(1, 2, 1), (1, 2, 2, 1), (1, 2, 4, 2, 1)]) + new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization") with gr.Row(): with gr.Column(scale=3): @@ -1280,6 +1282,8 @@ def create_ui(wrap_gradio_gpu_call): inputs=[ new_hypernetwork_name, new_hypernetwork_sizes, + new_hypernetwork_layer_structure, + new_hypernetwork_add_layer_norm, ], outputs=[ train_hypernetwork_name, diff --git a/webui.py b/webui.py index c7260c7a..177bef74 100644 --- a/webui.py +++ b/webui.py @@ -85,9 +85,7 @@ def initialize(): shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) shared.opts.onchange("sd_hypernetwork_strength", modules.hypernetworks.hypernetwork.apply_strength) - shared.opts.onchange("sd_hypernetwork_layer_structure", modules.hypernetworks.hypernetwork.apply_layer_structure) - shared.opts.onchange("sd_hypernetwork_add_layer_norm", modules.hypernetworks.hypernetwork.apply_layer_norm) - + # make the program just exit at ctrl+c without waiting for anything def sigint_handler(sig, frame): print(f'Interrupted with signal {sig} in {frame}') @@ -142,7 +140,7 @@ def webui(launch_api=False): create_api(app) wait_on_server(demo) - + sd_samplers.set_samplers() print('Reloading Custom Scripts') @@ -160,4 +158,4 @@ if __name__ == "__main__": if cmd_opts.nowebui: api_only() else: - webui(cmd_opts.api) \ No newline at end of file + webui(cmd_opts.api) -- cgit v1.2.3