From bb57f30c2de46cfca5419ad01738a41705f96cc3 Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Fri, 14 Oct 2022 10:56:41 +0200
Subject: init

---
 modules/ui.py | 59 +++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 20 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 220fb80b..d961d126 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -24,7 +24,8 @@ import gradio.routes
 
 from modules import sd_hijack
 from modules.paths import script_path
-from modules.shared import opts, cmd_opts
+from modules.shared import opts, cmd_opts,aesthetic_embeddings
+
 if cmd_opts.deepdanbooru:
     from modules.deepbooru import get_deepbooru_tags
 import modules.shared as shared
@@ -534,6 +535,14 @@ def create_ui(wrap_gradio_gpu_call):
                     width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
 
+                with gr.Group():
+                    aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005")
+                    aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.7)
+                    aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=50)
+
+                    aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None)
+                    aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
+
                 with gr.Row():
                     restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
                     tiling = gr.Checkbox(label='Tiling', value=False)
@@ -586,25 +595,30 @@ def create_ui(wrap_gradio_gpu_call):
                 fn=wrap_gradio_gpu_call(modules.txt2img.txt2img),
                 _js="submit",
                 inputs=[
-                    txt2img_prompt,
-                    txt2img_negative_prompt,
-                    txt2img_prompt_style,
-                    txt2img_prompt_style2,
-                    steps,
-                    sampler_index,
-                    restore_faces,
-                    tiling,
-                    batch_count,
-                    batch_size,
-                    cfg_scale,
-                    seed,
-                    subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox,
-                    height,
-                    width,
-                    enable_hr,
-                    scale_latent,
-                    denoising_strength,
-                ] + custom_inputs,
+                           txt2img_prompt,
+                           txt2img_negative_prompt,
+                           txt2img_prompt_style,
+                           txt2img_prompt_style2,
+                           steps,
+                           sampler_index,
+                           restore_faces,
+                           tiling,
+                           batch_count,
+                           batch_size,
+                           cfg_scale,
+                           seed,
+                           subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox,
+                           height,
+                           width,
+                           enable_hr,
+                           scale_latent,
+                           denoising_strength,
+                           aesthetic_lr,
+                           aesthetic_weight,
+                           aesthetic_steps,
+                           aesthetic_imgs,
+                           aesthetic_slerp
+                       ] + custom_inputs,
                 outputs=[
                     txt2img_gallery,
                     generation_info,
@@ -1097,6 +1111,9 @@ def create_ui(wrap_gradio_gpu_call):
                     template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"))
                     training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
                     training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+                    batch_size = gr.Slider(minimum=1, maximum=64, step=1, label="Batch Size", value=4)
+                    gradient_accumulation = gr.Slider(minimum=1, maximum=256, step=1, label="Gradient accumulation",
+                                                      value=1)
                     steps = gr.Number(label='Max steps', value=100000, precision=0)
                     create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0)
                     save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0)
@@ -1180,6 +1197,8 @@ def create_ui(wrap_gradio_gpu_call):
                 template_file,
                 save_image_with_stored_embedding,
                 preview_image_prompt,
+                batch_size,
+                gradient_accumulation
             ],
             outputs=[
                 ti_output,
-- 
cgit v1.2.3


From 37d7ffb415cd8c69b3c0bb5f61844dde0b169f78 Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sat, 15 Oct 2022 15:59:37 +0200
Subject: fix to tokens lenght, addend embs generator, add new features to edit
 the embedding before the generation using text

---
 modules/aesthetic_clip.py |  78 ++++++++++++++++++++++++
 modules/processing.py     | 148 +++++++++++++++++++++++++++++++---------------
 modules/sd_hijack.py      | 111 ++++++++++++++++++++++------------
 modules/shared.py         |   4 ++
 modules/txt2img.py        |  10 +++-
 modules/ui.py             |  47 ++++++++++++---
 6 files changed, 302 insertions(+), 96 deletions(-)
 create mode 100644 modules/aesthetic_clip.py

(limited to 'modules/ui.py')

diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py
new file mode 100644
index 00000000..f15cfd47
--- /dev/null
+++ b/modules/aesthetic_clip.py
@@ -0,0 +1,78 @@
+import itertools
+import os
+from pathlib import Path
+import html
+import gc
+
+import gradio as gr
+import torch
+from PIL import Image
+from modules import shared
+from modules.shared import device, aesthetic_embeddings
+from transformers import CLIPModel, CLIPProcessor
+
+from tqdm.auto import tqdm
+
+
+def get_all_images_in_folder(folder):
+    return [os.path.join(folder, f) for f in os.listdir(folder) if
+            os.path.isfile(os.path.join(folder, f)) and check_is_valid_image_file(f)]
+
+
+def check_is_valid_image_file(filename):
+    return filename.lower().endswith(('.png', '.jpg', '.jpeg'))
+
+
+def batched(dataset, total, n=1):
+    for ndx in range(0, total, n):
+        yield [dataset.__getitem__(i) for i in range(ndx, min(ndx + n, total))]
+
+
+def iter_to_batched(iterable, n=1):
+    it = iter(iterable)
+    while True:
+        chunk = tuple(itertools.islice(it, n))
+        if not chunk:
+            return
+        yield chunk
+
+
+def generate_imgs_embd(name, folder, batch_size):
+    # clipModel = CLIPModel.from_pretrained(
+    #     shared.sd_model.cond_stage_model.clipModel.name_or_path
+    # )
+    model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path).to(device)
+    processor = CLIPProcessor.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path)
+
+    with torch.no_grad():
+        embs = []
+        for paths in tqdm(iter_to_batched(get_all_images_in_folder(folder), batch_size),
+                          desc=f"Generating embeddings for {name}"):
+            if shared.state.interrupted:
+                break
+            inputs = processor(images=[Image.open(path) for path in paths], return_tensors="pt").to(device)
+            outputs = model.get_image_features(**inputs).cpu()
+            embs.append(torch.clone(outputs))
+            inputs.to("cpu")
+            del inputs, outputs
+
+        embs = torch.cat(embs, dim=0).mean(dim=0, keepdim=True)
+
+        # The generated embedding will be located here
+        path = str(Path(shared.cmd_opts.aesthetic_embeddings_dir) / f"{name}.pt")
+        torch.save(embs, path)
+
+        model = model.cpu()
+        del model
+        del processor
+        del embs
+        gc.collect()
+        torch.cuda.empty_cache()
+        res = f"""
+        Done generating embedding for {name}!
+        Hypernetwork saved to {html.escape(path)}
+        """
+        shared.update_aesthetic_embeddings()
+        return gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding",
+                           value=sorted(aesthetic_embeddings.keys())[0] if len(
+                               aesthetic_embeddings) > 0 else None), res, ""
diff --git a/modules/processing.py b/modules/processing.py
index 9a033759..ab68d63a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -20,7 +20,6 @@ import modules.images as images
 import modules.styles
 import logging
 
-
 # some of those options should not be changed at all because they would break the model, so I removed them from options.
 opt_C = 4
 opt_f = 8
@@ -52,8 +51,13 @@ def get_correct_sampler(p):
     elif isinstance(p, modules.processing.StableDiffusionProcessingImg2Img):
         return sd_samplers.samplers_for_img2img
 
+
 class StableDiffusionProcessing:
-    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None):
+    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1,
+                 subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True,
+                 sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512,
+                 restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False,
+                 extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None):
         self.sd_model = sd_model
         self.outpath_samples: str = outpath_samples
         self.outpath_grids: str = outpath_grids
@@ -104,7 +108,8 @@ class StableDiffusionProcessing:
 
 
 class Processed:
-    def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None):
+    def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None,
+                 all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None):
         self.images = images_list
         self.prompt = p.prompt
         self.negative_prompt = p.negative_prompt
@@ -141,7 +146,8 @@ class Processed:
         self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0]
         self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
         self.seed = int(self.seed if type(self.seed) != list else self.seed[0])
-        self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
+        self.subseed = int(
+            self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
 
         self.all_prompts = all_prompts or [self.prompt]
         self.all_seeds = all_seeds or [self.seed]
@@ -181,39 +187,43 @@ class Processed:
 
         return json.dumps(obj)
 
-    def infotext(self,  p: StableDiffusionProcessing, index):
-        return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
+    def infotext(self, p: StableDiffusionProcessing, index):
+        return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[],
+                               position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
 
 
 # from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3
 def slerp(val, low, high):
-    low_norm = low/torch.norm(low, dim=1, keepdim=True)
-    high_norm = high/torch.norm(high, dim=1, keepdim=True)
-    dot = (low_norm*high_norm).sum(1)
+    low_norm = low / torch.norm(low, dim=1, keepdim=True)
+    high_norm = high / torch.norm(high, dim=1, keepdim=True)
+    dot = (low_norm * high_norm).sum(1)
 
     if dot.mean() > 0.9995:
         return low * val + high * (1 - val)
 
     omega = torch.acos(dot)
     so = torch.sin(omega)
-    res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high
+    res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high
     return res
 
 
-def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None):
+def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0,
+                          p=None):
     xs = []
 
     # if we have multiple seeds, this means we are working with batch size>1; this then
     # enables the generation of additional tensors with noise that the sampler will use during its processing.
     # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
     # produce the same images as with two batches [100], [101].
-    if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0):
+    if p is not None and p.sampler is not None and (
+            len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0):
         sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
     else:
         sampler_noises = None
 
     for i, seed in enumerate(seeds):
-        noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
+        noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (
+            shape[0], seed_resize_from_h // 8, seed_resize_from_w // 8)
 
         subnoise = None
         if subseeds is not None:
@@ -241,7 +251,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
             dx = max(-dx, 0)
             dy = max(-dy, 0)
 
-            x[:, ty:ty+h, tx:tx+w] = noise[:, dy:dy+h, dx:dx+w]
+            x[:, ty:ty + h, tx:tx + w] = noise[:, dy:dy + h, dx:dx + w]
             noise = x
 
         if sampler_noises is not None:
@@ -293,14 +303,20 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
         "Seed": all_seeds[index],
         "Face restoration": (opts.face_restoration_model if p.restore_faces else None),
         "Size": f"{p.width}x{p.height}",
-        "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
-        "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
-        "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(':', '')),
+        "Model hash": getattr(p, 'sd_model_hash',
+                              None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
+        "Model": (
+            None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(
+                ',', '').replace(':', '')),
+        "Hypernet": (
+            None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(
+                ':', '')),
         "Batch size": (None if p.batch_size < 2 else p.batch_size),
         "Batch pos": (None if p.batch_size < 2 else position_in_batch),
         "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
         "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
-        "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
+        "Seed resize from": (
+            None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
         "Denoising strength": getattr(p, 'denoising_strength', None),
         "Eta": (None if p.sampler is None or p.sampler.eta == p.sampler.default_eta else p.sampler.eta),
         "Clip skip": None if clip_skip <= 1 else clip_skip,
@@ -309,7 +325,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
 
     generation_params.update(p.extra_generation_params)
 
-    generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
+    generation_params_text = ", ".join(
+        [k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
 
     negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else ""
 
@@ -317,7 +334,9 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
 
 
 def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0,
-                   aesthetic_imgs=None,aesthetic_slerp=False) -> Processed:
+                   aesthetic_imgs=None, aesthetic_slerp=False, aesthetic_imgs_text="",
+                   aesthetic_slerp_angle=0.15,
+                   aesthetic_text_negative=False) -> Processed:
     """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
 
     aesthetic_lr = float(aesthetic_lr)
@@ -385,7 +404,7 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
         for n in range(p.n_iter):
             if state.skipped:
                 state.skipped = False
-            
+
             if state.interrupted:
                 break
 
@@ -396,16 +415,19 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
             if (len(prompts) == 0):
                 break
 
-            #uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
-            #c = p.sd_model.get_learned_conditioning(prompts)
+            # uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
+            # c = p.sd_model.get_learned_conditioning(prompts)
             with devices.autocast():
                 if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"):
-                    shared.sd_model.cond_stage_model.set_aesthetic_params(0, 0, 0)
+                    shared.sd_model.cond_stage_model.set_aesthetic_params()
                 uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt],
                                                             p.steps)
                 if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"):
                     shared.sd_model.cond_stage_model.set_aesthetic_params(aesthetic_lr, aesthetic_weight,
-                                                                          aesthetic_steps, aesthetic_imgs,aesthetic_slerp)
+                                                                          aesthetic_steps, aesthetic_imgs,
+                                                                          aesthetic_slerp, aesthetic_imgs_text,
+                                                                          aesthetic_slerp_angle,
+                                                                          aesthetic_text_negative)
                 c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps)
 
             if len(model_hijack.comments) > 0:
@@ -413,13 +435,13 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
                     comments[comment] = 1
 
             if p.n_iter > 1:
-                shared.state.job = f"Batch {n+1} out of {p.n_iter}"
+                shared.state.job = f"Batch {n + 1} out of {p.n_iter}"
 
             with devices.autocast():
-                samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength)
+                samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds,
+                                        subseed_strength=p.subseed_strength)
 
             if state.interrupted or state.skipped:
-
                 # if we are interrupted, sample returns just noise
                 # use the image collected previously in sampler loop
                 samples_ddim = shared.state.current_latent
@@ -445,7 +467,9 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
 
                 if p.restore_faces:
                     if opts.save and not p.do_not_save_samples and opts.save_images_before_face_restoration:
-                        images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-face-restoration")
+                        images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", seeds[i], prompts[i],
+                                          opts.samples_format, info=infotext(n, i), p=p,
+                                          suffix="-before-face-restoration")
 
                     devices.torch_gc()
 
@@ -456,7 +480,8 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
 
                 if p.color_corrections is not None and i < len(p.color_corrections):
                     if opts.save and not p.do_not_save_samples and opts.save_images_before_color_correction:
-                        images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-color-correction")
+                        images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format,
+                                          info=infotext(n, i), p=p, suffix="-before-color-correction")
                     image = apply_color_correction(p.color_corrections[i], image)
 
                 if p.overlay_images is not None and i < len(p.overlay_images):
@@ -474,7 +499,8 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
                     image = image.convert('RGB')
 
                 if opts.samples_save and not p.do_not_save_samples:
-                    images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p)
+                    images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format,
+                                      info=infotext(n, i), p=p)
 
                 text = infotext(n, i)
                 infotexts.append(text)
@@ -482,7 +508,7 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
                     image.info["parameters"] = text
                 output_images.append(image)
 
-            del x_samples_ddim 
+            del x_samples_ddim
 
             devices.torch_gc()
 
@@ -504,10 +530,13 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
                 index_of_first_image = 1
 
             if opts.grid_save:
-                images.save_image(grid, p.outpath_grids, "grid", all_seeds[0], all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True)
+                images.save_image(grid, p.outpath_grids, "grid", all_seeds[0], all_prompts[0], opts.grid_format,
+                                  info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True)
 
     devices.torch_gc()
-    return Processed(p, output_images, all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]), subseed=all_subseeds[0], all_prompts=all_prompts, all_seeds=all_seeds, all_subseeds=all_subseeds, index_of_first_image=index_of_first_image, infotexts=infotexts)
+    return Processed(p, output_images, all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]),
+                     subseed=all_subseeds[0], all_prompts=all_prompts, all_seeds=all_seeds, all_subseeds=all_subseeds,
+                     index_of_first_image=index_of_first_image, infotexts=infotexts)
 
 
 class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
@@ -543,25 +572,34 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
 
         if not self.enable_hr:
-            x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+            x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds,
+                                      subseeds=subseeds, subseed_strength=self.subseed_strength,
+                                      seed_resize_from_h=self.seed_resize_from_h,
+                                      seed_resize_from_w=self.seed_resize_from_w, p=self)
             samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
             return samples
 
-        x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+        x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds,
+                                  subseeds=subseeds, subseed_strength=self.subseed_strength,
+                                  seed_resize_from_h=self.seed_resize_from_h,
+                                  seed_resize_from_w=self.seed_resize_from_w, p=self)
         samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
 
         truncate_x = (self.firstphase_width - self.firstphase_width_truncated) // opt_f
         truncate_y = (self.firstphase_height - self.firstphase_height_truncated) // opt_f
 
-        samples = samples[:, :, truncate_y//2:samples.shape[2]-truncate_y//2, truncate_x//2:samples.shape[3]-truncate_x//2]
+        samples = samples[:, :, truncate_y // 2:samples.shape[2] - truncate_y // 2,
+                  truncate_x // 2:samples.shape[3] - truncate_x // 2]
 
         if self.scale_latent:
-            samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+            samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f),
+                                                      mode="bilinear")
         else:
             decoded_samples = decode_first_stage(self.sd_model, samples)
 
             if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None":
-                decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear")
+                decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width),
+                                                                  mode="bilinear")
             else:
                 lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
 
@@ -585,13 +623,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
         self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
 
-        noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+        noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds,
+                                      subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h,
+                                      seed_resize_from_w=self.seed_resize_from_w, p=self)
 
         # GC now before running the next img2img to prevent running out of memory
         x = None
         devices.torch_gc()
 
-        samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps)
+        samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning,
+                                              steps=self.steps)
 
         return samples
 
@@ -599,7 +640,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
     sampler = None
 
-    def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, mask=None, mask_blur=4, inpainting_fill=0, inpaint_full_res=True, inpaint_full_res_padding=0, inpainting_mask_invert=0, **kwargs):
+    def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, mask=None, mask_blur=4,
+                 inpainting_fill=0, inpaint_full_res=True, inpaint_full_res_padding=0, inpainting_mask_invert=0,
+                 **kwargs):
         super().__init__(**kwargs)
 
         self.init_images = init_images
@@ -607,7 +650,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         self.denoising_strength: float = denoising_strength
         self.init_latent = None
         self.image_mask = mask
-        #self.image_unblurred_mask = None
+        # self.image_unblurred_mask = None
         self.latent_mask = None
         self.mask_for_overlay = None
         self.mask_blur = mask_blur
@@ -619,7 +662,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         self.nmask = None
 
     def init(self, all_prompts, all_seeds, all_subseeds):
-        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model)
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index,
+                                                             self.sd_model)
         crop_region = None
 
         if self.image_mask is not None:
@@ -628,7 +672,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             if self.inpainting_mask_invert:
                 self.image_mask = ImageOps.invert(self.image_mask)
 
-            #self.image_unblurred_mask = self.image_mask
+            # self.image_unblurred_mask = self.image_mask
 
             if self.mask_blur > 0:
                 self.image_mask = self.image_mask.filter(ImageFilter.GaussianBlur(self.mask_blur))
@@ -642,7 +686,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
                 mask = mask.crop(crop_region)
                 self.image_mask = images.resize_image(2, mask, self.width, self.height)
-                self.paste_to = (x1, y1, x2-x1, y2-y1)
+                self.paste_to = (x1, y1, x2 - x1, y2 - y1)
             else:
                 self.image_mask = images.resize_image(self.resize_mode, self.image_mask, self.width, self.height)
                 np_mask = np.array(self.image_mask)
@@ -665,7 +709,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
             if self.image_mask is not None:
                 image_masked = Image.new('RGBa', (image.width, image.height))
-                image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+                image_masked.paste(image.convert("RGBA").convert("RGBa"),
+                                   mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
 
                 self.overlay_images.append(image_masked.convert('RGBA'))
 
@@ -714,12 +759,17 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
             # this needs to be fixed to be done in sample() using actual seeds for batches
             if self.inpainting_fill == 2:
-                self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask
+                self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:],
+                                                                                        all_seeds[
+                                                                                        0:self.init_latent.shape[
+                                                                                            0]]) * self.nmask
             elif self.inpainting_fill == 3:
                 self.init_latent = self.init_latent * self.mask
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
-        x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+        x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds,
+                                  subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h,
+                                  seed_resize_from_w=self.seed_resize_from_w, p=self)
 
         samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning)
 
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 6d5196fe..192883b2 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -14,7 +14,8 @@ from modules.sd_hijack_optimizations import invokeAI_mps_available
 
 import ldm.modules.attention
 import ldm.modules.diffusionmodules.model
-from transformers import CLIPVisionModel, CLIPModel
+from tqdm import trange
+from transformers import CLIPVisionModel, CLIPModel, CLIPTokenizer
 import torch.optim as optim
 import copy
 
@@ -22,21 +23,25 @@ attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
 diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
 diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward
 
+
 def apply_optimizations():
     undo_optimizations()
 
     ldm.modules.diffusionmodules.model.nonlinearity = silu
 
-    if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (8, 6)):
+    if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (
+            6, 0) <= torch.cuda.get_device_capability(shared.device) <= (8, 6)):
         print("Applying xformers cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward
     elif cmd_opts.opt_split_attention_v1:
         print("Applying v1 cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
-    elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()):
+    elif not cmd_opts.disable_opt_split_attention and (
+            cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()):
         if not invokeAI_mps_available and shared.device.type == 'mps':
-            print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.")
+            print(
+                "The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.")
             print("Applying v1 cross attention optimization.")
             ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
         else:
@@ -112,14 +117,16 @@ class StableDiffusionModelHijack:
         _, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text])
         return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count)
 
+
 def slerp(low, high, val):
-    low_norm = low/torch.norm(low, dim=1, keepdim=True)
-    high_norm = high/torch.norm(high, dim=1, keepdim=True)
-    omega = torch.acos((low_norm*high_norm).sum(1))
+    low_norm = low / torch.norm(low, dim=1, keepdim=True)
+    high_norm = high / torch.norm(high, dim=1, keepdim=True)
+    omega = torch.acos((low_norm * high_norm).sum(1))
     so = torch.sin(omega)
-    res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high
+    res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high
     return res
 
+
 class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
     def __init__(self, wrapped, hijack):
         super().__init__()
@@ -128,6 +135,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             self.wrapped.transformer.name_or_path
         )
         del self.clipModel.vision_model
+        self.tokenizer = CLIPTokenizer.from_pretrained(self.wrapped.transformer.name_or_path)
         self.hijack: StableDiffusionModelHijack = hijack
         self.tokenizer = wrapped.tokenizer
         # self.vision = CLIPVisionModel.from_pretrained(self.wrapped.transformer.name_or_path).eval()
@@ -139,7 +147,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
 
         self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ',</w>'][0]
 
-        tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
+        tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if
+                              '(' in k or ')' in k or '[' in k or ']' in k]
         for text, ident in tokens_with_parens:
             mult = 1.0
             for c in text:
@@ -155,8 +164,13 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             if mult != 1.0:
                 self.token_mults[ident] = mult
 
-    def set_aesthetic_params(self, aesthetic_lr, aesthetic_weight, aesthetic_steps, image_embs_name=None,
-                             aesthetic_slerp=True):
+    def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
+                             aesthetic_slerp=True, aesthetic_imgs_text="",
+                             aesthetic_slerp_angle=0.15,
+                             aesthetic_text_negative=False):
+        self.aesthetic_imgs_text = aesthetic_imgs_text
+        self.aesthetic_slerp_angle = aesthetic_slerp_angle
+        self.aesthetic_text_negative = aesthetic_text_negative
         self.slerp = aesthetic_slerp
         self.aesthetic_lr = aesthetic_lr
         self.aesthetic_weight = aesthetic_weight
@@ -180,7 +194,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
         else:
             parsed = [[line, 1.0]]
 
-        tokenized = self.wrapped.tokenizer([text for text, _ in parsed], truncation=False, add_special_tokens=False)["input_ids"]
+        tokenized = self.wrapped.tokenizer([text for text, _ in parsed], truncation=False, add_special_tokens=False)[
+            "input_ids"]
 
         fixes = []
         remade_tokens = []
@@ -196,18 +211,20 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
 
                 if token == self.comma_token:
                     last_comma = len(remade_tokens)
-                elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), 1) % 75 == 0 and last_comma != -1 and len(remade_tokens) - last_comma <= opts.comma_padding_backtrack:
+                elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens),
+                                                               1) % 75 == 0 and last_comma != -1 and len(
+                    remade_tokens) - last_comma <= opts.comma_padding_backtrack:
                     last_comma += 1
                     reloc_tokens = remade_tokens[last_comma:]
                     reloc_mults = multipliers[last_comma:]
 
                     remade_tokens = remade_tokens[:last_comma]
                     length = len(remade_tokens)
-                    
+
                     rem = int(math.ceil(length / 75)) * 75 - length
                     remade_tokens += [id_end] * rem + reloc_tokens
                     multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults
-                
+
                 if embedding is None:
                     remade_tokens.append(token)
                     multipliers.append(weight)
@@ -248,7 +265,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             if line in cache:
                 remade_tokens, fixes, multipliers = cache[line]
             else:
-                remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments)
+                remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms,
+                                                                                            hijack_comments)
                 token_count = max(current_token_count, token_count)
 
                 cache[line] = (remade_tokens, fixes, multipliers)
@@ -259,7 +277,6 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
 
         return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
 
-
     def process_text_old(self, text):
         id_start = self.wrapped.tokenizer.bos_token_id
         id_end = self.wrapped.tokenizer.eos_token_id
@@ -289,7 +306,8 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
                 while i < len(tokens):
                     token = tokens[i]
 
-                    embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+                    embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens,
+                                                                                                                i)
 
                     mult_change = self.token_mults.get(token) if opts.enable_emphasis else None
                     if mult_change is not None:
@@ -312,11 +330,12 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
                     ovf = remade_tokens[maxlen - 2:]
                     overflowing_words = [vocab.get(int(x), "") for x in ovf]
                     overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))
-                    hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
+                    hijack_comments.append(
+                        f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
 
                 token_count = len(remade_tokens)
                 remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
-                remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end]
+                remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end]
                 cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
 
             multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
@@ -326,23 +345,26 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             hijack_fixes.append(fixes)
             batch_multipliers.append(multipliers)
         return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
-    
+
     def forward(self, text):
         use_old = opts.use_old_emphasis_implementation
         if use_old:
-            batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
+            batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(
+                text)
         else:
-            batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(text)
+            batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(
+                text)
 
         self.hijack.comments += hijack_comments
 
         if len(used_custom_terms) > 0:
-            self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
-        
+            self.hijack.comments.append(
+                "Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
+
         if use_old:
             self.hijack.fixes = hijack_fixes
             return self.process_tokens(remade_batch_tokens, batch_multipliers)
-        
+
         z = None
         i = 0
         while max(map(len, remade_batch_tokens)) != 0:
@@ -356,7 +378,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
                     if fix[0] == i:
                         fixes.append(fix[1])
                 self.hijack.fixes.append(fixes)
-            
+
             tokens = []
             multipliers = []
             for j in range(len(remade_batch_tokens)):
@@ -378,19 +400,30 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
                         remade_batch_tokens]
 
                 tokens = torch.asarray(remade_batch_tokens).to(device)
+
+                model = copy.deepcopy(self.clipModel).to(device)
+                model.requires_grad_(True)
+                if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0:
+                    text_embs_2 = model.get_text_features(
+                        **self.tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device))
+                    if self.aesthetic_text_negative:
+                        text_embs_2 = self.image_embs - text_embs_2
+                        text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True)
+                    img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle)
+                else:
+                    img_embs = self.image_embs
+
                 with torch.enable_grad():
-                    model = copy.deepcopy(self.clipModel).to(device)
-                    model.requires_grad_(True)
 
                     # We optimize the model to maximize the similarity
                     optimizer = optim.Adam(
                         model.text_model.parameters(), lr=self.aesthetic_lr
                     )
 
-                    for i in range(self.aesthetic_steps):
+                    for i in trange(self.aesthetic_steps, desc="Aesthetic optimization"):
                         text_embs = model.get_text_features(input_ids=tokens)
                         text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True)
-                        sim = text_embs @ self.image_embs.T
+                        sim = text_embs @ img_embs.T
                         loss = -sim
                         optimizer.zero_grad()
                         loss.mean().backward()
@@ -405,6 +438,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
                     model.cpu()
                     del model
 
+                zn = torch.concat([zn for i in range(z.shape[1] // 77)], 1)
                 if self.slerp:
                     z = slerp(z, zn, self.aesthetic_weight)
                 else:
@@ -413,15 +447,16 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             remade_batch_tokens = rem_tokens
             batch_multipliers = rem_multipliers
             i += 1
-        
+
         return z
-        
-    
+
     def process_tokens(self, remade_batch_tokens, batch_multipliers):
         if not opts.use_old_emphasis_implementation:
-            remade_batch_tokens = [[self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in remade_batch_tokens]
+            remade_batch_tokens = [
+                [self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in
+                remade_batch_tokens]
             batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers]
-        
+
         tokens = torch.asarray(remade_batch_tokens).to(device)
         outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
 
@@ -461,8 +496,8 @@ class EmbeddingsWithFixes(torch.nn.Module):
         for fixes, tensor in zip(batch_fixes, inputs_embeds):
             for offset, embedding in fixes:
                 emb = embedding.vec
-                emb_len = min(tensor.shape[0]-offset-1, emb.shape[0])
-                tensor = torch.cat([tensor[0:offset+1], emb[0:emb_len], tensor[offset+1+emb_len:]])
+                emb_len = min(tensor.shape[0] - offset - 1, emb.shape[0])
+                tensor = torch.cat([tensor[0:offset + 1], emb[0:emb_len], tensor[offset + 1 + emb_len:]])
 
             vecs.append(tensor)
 
diff --git a/modules/shared.py b/modules/shared.py
index cf13a10d..7cd608ca 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -95,6 +95,10 @@ loaded_hypernetwork = None
 aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
                         os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
 
+def update_aesthetic_embeddings():
+    global aesthetic_embeddings
+    aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
+                            os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
 
 def reload_hypernetworks():
     global hypernetworks
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 78342024..eedcdfe0 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -13,7 +13,11 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
             aesthetic_lr=0,
             aesthetic_weight=0, aesthetic_steps=0,
             aesthetic_imgs=None,
-            aesthetic_slerp=False, *args):
+            aesthetic_slerp=False,
+            aesthetic_imgs_text="",
+            aesthetic_slerp_angle=0.15,
+            aesthetic_text_negative=False,
+            *args):
     p = StableDiffusionProcessingTxt2Img(
         sd_model=shared.sd_model,
         outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
@@ -47,7 +51,9 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
     processed = modules.scripts.scripts_txt2img.run(p, *args)
 
     if processed is None:
-        processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp)
+        processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp,aesthetic_imgs_text,
+                           aesthetic_slerp_angle,
+                           aesthetic_text_negative)
 
     shared.total_tqdm.clear()
 
diff --git a/modules/ui.py b/modules/ui.py
index d961d126..e98e2113 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -41,6 +41,7 @@ from modules import prompt_parser
 from modules.images import save_image
 import modules.textual_inversion.ui
 import modules.hypernetworks.ui
+import modules.aesthetic_clip
 
 # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI
 mimetypes.init()
@@ -449,7 +450,7 @@ def create_toprow(is_img2img):
                     with gr.Row():
                         negative_prompt = gr.Textbox(label="Negative prompt", elem_id="negative_prompt", show_label=False, placeholder="Negative prompt", lines=2)
                 with gr.Column(scale=1, elem_id="roll_col"):
-                    sh = gr.Button(elem_id="sh", visible=True)                           
+                    sh = gr.Button(elem_id="sh", visible=True)
 
                 with gr.Column(scale=1, elem_id="style_neg_col"):
                     prompt_style2 = gr.Dropdown(label="Style 2", elem_id=f"{id_part}_style2_index", choices=[k for k, v in shared.prompt_styles.styles.items()], value=next(iter(shared.prompt_styles.styles.keys())), visible=len(shared.prompt_styles.styles) > 1)
@@ -536,9 +537,13 @@ def create_ui(wrap_gradio_gpu_call):
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
 
                 with gr.Group():
-                    aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005")
-                    aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.7)
-                    aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=50)
+                    aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.0001")
+                    aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
+                    aesthetic_steps = gr.Slider(minimum=0, maximum=256, step=1, label="Aesthetic steps", value=5)
+                    with gr.Row():
+                        aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="")
+                        aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
+                        aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
 
                     aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None)
                     aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
@@ -617,7 +622,10 @@ def create_ui(wrap_gradio_gpu_call):
                            aesthetic_weight,
                            aesthetic_steps,
                            aesthetic_imgs,
-                           aesthetic_slerp
+                           aesthetic_slerp,
+                           aesthetic_imgs_text,
+                           aesthetic_slerp_angle,
+                           aesthetic_text_negative
                        ] + custom_inputs,
                 outputs=[
                     txt2img_gallery,
@@ -721,7 +729,7 @@ def create_ui(wrap_gradio_gpu_call):
 
                         with gr.Row():
                             inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution', value=False)
-                            inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=256, step=4, value=32)
+                            inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=1024, step=4, value=32)
 
                     with gr.TabItem('Batch img2img', id='batch'):
                         hidden = '<br>Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
@@ -1071,6 +1079,17 @@ def create_ui(wrap_gradio_gpu_call):
                         with gr.Column():
                             create_embedding = gr.Button(value="Create embedding", variant='primary')
 
+                with gr.Tab(label="Create images embedding"):
+                    new_embedding_name_ae = gr.Textbox(label="Name")
+                    process_src_ae = gr.Textbox(label='Source directory')
+                    batch_ae = gr.Slider(minimum=1, maximum=1024, step=1, label="Batch size", value=256)
+                    with gr.Row():
+                        with gr.Column(scale=3):
+                            gr.HTML(value="")
+
+                        with gr.Column():
+                            create_embedding_ae = gr.Button(value="Create images embedding", variant='primary')
+
                 with gr.Tab(label="Create hypernetwork"):
                     new_hypernetwork_name = gr.Textbox(label="Name")
                     new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
@@ -1139,7 +1158,7 @@ def create_ui(wrap_gradio_gpu_call):
             fn=modules.textual_inversion.ui.create_embedding,
             inputs=[
                 new_embedding_name,
-                initialization_text,
+                process_src,
                 nvpt,
             ],
             outputs=[
@@ -1149,6 +1168,20 @@ def create_ui(wrap_gradio_gpu_call):
             ]
         )
 
+        create_embedding_ae.click(
+            fn=modules.aesthetic_clip.generate_imgs_embd,
+            inputs=[
+                new_embedding_name_ae,
+                process_src_ae,
+                batch_ae
+            ],
+            outputs=[
+                aesthetic_imgs,
+                ti_output,
+                ti_outcome,
+            ]
+        )
+
         create_hypernetwork.click(
             fn=modules.hypernetworks.ui.create_hypernetwork,
             inputs=[
-- 
cgit v1.2.3


From 4387e4fe6479c08f7bc7e42924c3a1093e3a1872 Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sat, 15 Oct 2022 18:39:29 +0200
Subject: Update modules/ui.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Víctor Gallego <vicgalle@ucm.es>
---
 modules/ui.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index d0696101..5bb961b2 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -599,7 +599,8 @@ def create_ui(wrap_gradio_gpu_call):
                 with gr.Group():
                     aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.0001")
                     aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
-                    aesthetic_steps = gr.Slider(minimum=0, maximum=256, step=1, label="Aesthetic steps", value=5)
+                    aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
+
                     with gr.Row():
                         aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="")
                         aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
-- 
cgit v1.2.3


From 0d4f5db235357aeb4c7a8738179ba33aaf5a6b75 Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sat, 15 Oct 2022 18:40:58 +0200
Subject: Update modules/ui.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Víctor Gallego <vicgalle@ucm.es>
---
 modules/ui.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 5bb961b2..25eba548 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -597,7 +597,8 @@ def create_ui(wrap_gradio_gpu_call):
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
 
                 with gr.Group():
-                    aesthetic_lr = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.0001")
+                    aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001")
+
                     aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
                     aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
 
-- 
cgit v1.2.3


From ad9bc604a8fadcfebe72be37f66cec51e7e87fb5 Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sat, 15 Oct 2022 18:41:18 +0200
Subject: Update modules/ui.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Víctor Gallego <vicgalle@ucm.es>
---
 modules/ui.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 25eba548..3b28b69c 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -607,7 +607,8 @@ def create_ui(wrap_gradio_gpu_call):
                         aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
                         aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
 
-                    aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None)
+                    aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Aesthetic imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None)
+
                     aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
 
                 with gr.Row():
-- 
cgit v1.2.3


From 3f5c3b981e46c16bb10948d012575b25170efb3b Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sat, 15 Oct 2022 18:41:46 +0200
Subject: Update modules/ui.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Víctor Gallego <vicgalle@ucm.es>
---
 modules/ui.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 3b28b69c..1f6fcdc9 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1190,7 +1190,8 @@ def create_ui(wrap_gradio_gpu_call):
                         with gr.Column():
                             create_embedding = gr.Button(value="Create embedding", variant='primary')
 
-                with gr.Tab(label="Create images embedding"):
+                with gr.Tab(label="Create aesthetic images embedding"):
+
                     new_embedding_name_ae = gr.Textbox(label="Name")
                     process_src_ae = gr.Textbox(label='Source directory')
                     batch_ae = gr.Slider(minimum=1, maximum=1024, step=1, label="Batch size", value=256)
-- 
cgit v1.2.3


From 523140d7805c644700009b8a2483ff4eb4a22304 Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sun, 16 Oct 2022 10:23:30 +0200
Subject: ui fix

---
 modules/aesthetic_clip.py |  3 +--
 modules/sd_hijack.py      |  3 +--
 modules/shared.py         |  2 ++
 modules/ui.py             | 24 ++++++++++++++----------
 4 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py
index 68264284..ccb35c73 100644
--- a/modules/aesthetic_clip.py
+++ b/modules/aesthetic_clip.py
@@ -74,5 +74,4 @@ def generate_imgs_embd(name, folder, batch_size):
         """
         shared.update_aesthetic_embeddings()
         return gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()), label="Imgs embedding",
-                           value=sorted(shared.aesthetic_embeddings.keys())[0] if len(
-                               shared.aesthetic_embeddings) > 0 else None), res, ""
+                           value="None"), res, ""
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 01fcb78f..2de2eed5 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -392,8 +392,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             z1 = self.process_tokens(tokens, multipliers)
             z = z1 if z is None else torch.cat((z, z1), axis=-2)
 
-            if len(text[
-                       0]) != 0 and self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None:
+            if self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None:
                 if not opts.use_old_emphasis_implementation:
                     remade_batch_tokens = [
                         [self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in
diff --git a/modules/shared.py b/modules/shared.py
index 3c5ffef1..e2c98b2d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -96,11 +96,13 @@ loaded_hypernetwork = None
 
 aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
                         os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
+aesthetic_embeddings = aesthetic_embeddings | {"None": None}
 
 def update_aesthetic_embeddings():
     global aesthetic_embeddings
     aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
                             os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
+    aesthetic_embeddings = aesthetic_embeddings | {"None": None}
 
 def reload_hypernetworks():
     global hypernetworks
diff --git a/modules/ui.py b/modules/ui.py
index 13ba3142..4069f0d2 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -594,19 +594,23 @@ def create_ui(wrap_gradio_gpu_call):
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
 
                 with gr.Group():
-                    aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001")
-
-                    aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
-                    aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
+                    with gr.Accordion("Open for Clip Aesthetic!",open=False):
+                        with gr.Row():
+                            aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
+                            aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
 
-                    with gr.Row():
-                        aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="")
-                        aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
-                        aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
+                        with gr.Row():
+                            aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001")
+                            aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
+                            aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()),
+                                                         label="Aesthetic imgs embedding",
+                                                         value="None")
 
-                    aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()), label="Aesthetic imgs embedding", value=sorted(aesthetic_embeddings.keys())[0] if len(aesthetic_embeddings) > 0 else None)
+                        with gr.Row():
+                            aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="")
+                            aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
+                            aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
 
-                    aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
 
                 with gr.Row():
                     restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
-- 
cgit v1.2.3


From 9324cdaa3199d65c182858785dd1eca42b192b8e Mon Sep 17 00:00:00 2001
From: MalumaDev <piano.lu92@gmail.com>
Date: Sun, 16 Oct 2022 17:53:56 +0200
Subject: ui fix, re organization of the code

---
 modules/aesthetic_clip.py            | 154 +++++++++++++++++++++++++++++++++--
 modules/img2img.py                   |  14 +++-
 modules/processing.py                |  29 ++-----
 modules/sd_hijack.py                 | 102 ++---------------------
 modules/sd_models.py                 |   5 +-
 modules/shared.py                    |  14 +++-
 modules/textual_inversion/dataset.py |   2 +-
 modules/txt2img.py                   |  18 ++--
 modules/ui.py                        |  52 +++++++-----
 9 files changed, 233 insertions(+), 157 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py
index ccb35c73..34efa931 100644
--- a/modules/aesthetic_clip.py
+++ b/modules/aesthetic_clip.py
@@ -1,3 +1,4 @@
+import copy
 import itertools
 import os
 from pathlib import Path
@@ -7,11 +8,12 @@ import gc
 import gradio as gr
 import torch
 from PIL import Image
-from modules import shared
-from modules.shared import device
-from transformers import CLIPModel, CLIPProcessor
+from torch import optim
 
-from tqdm.auto import tqdm
+from modules import shared
+from transformers import CLIPModel, CLIPProcessor, CLIPTokenizer
+from tqdm.auto import tqdm, trange
+from modules.shared import opts, device
 
 
 def get_all_images_in_folder(folder):
@@ -37,12 +39,39 @@ def iter_to_batched(iterable, n=1):
         yield chunk
 
 
+def create_ui():
+    with gr.Group():
+        with gr.Accordion("Open for Clip Aesthetic!", open=False):
+            with gr.Row():
+                aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight",
+                                             value=0.9)
+                aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
+
+            with gr.Row():
+                aesthetic_lr = gr.Textbox(label='Aesthetic learning rate',
+                                          placeholder="Aesthetic learning rate", value="0.0001")
+                aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
+                aesthetic_imgs = gr.Dropdown(sorted(shared.aesthetic_embeddings.keys()),
+                                             label="Aesthetic imgs embedding",
+                                             value="None")
+
+            with gr.Row():
+                aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs',
+                                                 placeholder="This text is used to rotate the feature space of the imgs embs",
+                                                 value="")
+                aesthetic_slerp_angle = gr.Slider(label='Slerp angle', minimum=0, maximum=1, step=0.01,
+                                                  value=0.1)
+                aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
+
+    return aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative
+
+
 def generate_imgs_embd(name, folder, batch_size):
     # clipModel = CLIPModel.from_pretrained(
     #     shared.sd_model.cond_stage_model.clipModel.name_or_path
     # )
-    model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path).to(device)
-    processor = CLIPProcessor.from_pretrained(shared.sd_model.cond_stage_model.clipModel.name_or_path)
+    model = shared.clip_model.to(device)
+    processor = CLIPProcessor.from_pretrained(model.name_or_path)
 
     with torch.no_grad():
         embs = []
@@ -63,7 +92,6 @@ def generate_imgs_embd(name, folder, batch_size):
         torch.save(embs, path)
 
         model = model.cpu()
-        del model
         del processor
         del embs
         gc.collect()
@@ -74,4 +102,114 @@ def generate_imgs_embd(name, folder, batch_size):
         """
         shared.update_aesthetic_embeddings()
         return gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()), label="Imgs embedding",
-                           value="None"), res, ""
+                                  value="None"), \
+               gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()),
+                                  label="Imgs embedding",
+                                  value="None"), res, ""
+
+
+def slerp(low, high, val):
+    low_norm = low / torch.norm(low, dim=1, keepdim=True)
+    high_norm = high / torch.norm(high, dim=1, keepdim=True)
+    omega = torch.acos((low_norm * high_norm).sum(1))
+    so = torch.sin(omega)
+    res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high
+    return res
+
+
+class AestheticCLIP:
+    def __init__(self):
+        self.skip = False
+        self.aesthetic_steps = 0
+        self.aesthetic_weight = 0
+        self.aesthetic_lr = 0
+        self.slerp = False
+        self.aesthetic_text_negative = ""
+        self.aesthetic_slerp_angle = 0
+        self.aesthetic_imgs_text = ""
+
+        self.image_embs_name = None
+        self.image_embs = None
+        self.load_image_embs(None)
+
+    def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
+                             aesthetic_slerp=True, aesthetic_imgs_text="",
+                             aesthetic_slerp_angle=0.15,
+                             aesthetic_text_negative=False):
+        self.aesthetic_imgs_text = aesthetic_imgs_text
+        self.aesthetic_slerp_angle = aesthetic_slerp_angle
+        self.aesthetic_text_negative = aesthetic_text_negative
+        self.slerp = aesthetic_slerp
+        self.aesthetic_lr = aesthetic_lr
+        self.aesthetic_weight = aesthetic_weight
+        self.aesthetic_steps = aesthetic_steps
+        self.load_image_embs(image_embs_name)
+
+    def set_skip(self, skip):
+        self.skip = skip
+
+    def load_image_embs(self, image_embs_name):
+        if image_embs_name is None or len(image_embs_name) == 0 or image_embs_name == "None":
+            image_embs_name = None
+            self.image_embs_name = None
+        if image_embs_name is not None and self.image_embs_name != image_embs_name:
+            self.image_embs_name = image_embs_name
+            self.image_embs = torch.load(shared.aesthetic_embeddings[self.image_embs_name], map_location=device)
+            self.image_embs /= self.image_embs.norm(dim=-1, keepdim=True)
+            self.image_embs.requires_grad_(False)
+
+    def __call__(self, z, remade_batch_tokens):
+        if not self.skip and self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name is not None:
+            tokenizer = shared.sd_model.cond_stage_model.tokenizer
+            if not opts.use_old_emphasis_implementation:
+                remade_batch_tokens = [
+                    [tokenizer.bos_token_id] + x[:75] + [tokenizer.eos_token_id] for x in
+                    remade_batch_tokens]
+
+            tokens = torch.asarray(remade_batch_tokens).to(device)
+
+            model = copy.deepcopy(shared.clip_model).to(device)
+            model.requires_grad_(True)
+            if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0:
+                text_embs_2 = model.get_text_features(
+                    **tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device))
+                if self.aesthetic_text_negative:
+                    text_embs_2 = self.image_embs - text_embs_2
+                    text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True)
+                img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle)
+            else:
+                img_embs = self.image_embs
+
+            with torch.enable_grad():
+
+                # We optimize the model to maximize the similarity
+                optimizer = optim.Adam(
+                    model.text_model.parameters(), lr=self.aesthetic_lr
+                )
+
+                for _ in trange(self.aesthetic_steps, desc="Aesthetic optimization"):
+                    text_embs = model.get_text_features(input_ids=tokens)
+                    text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True)
+                    sim = text_embs @ img_embs.T
+                    loss = -sim
+                    optimizer.zero_grad()
+                    loss.mean().backward()
+                    optimizer.step()
+
+                zn = model.text_model(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
+                if opts.CLIP_stop_at_last_layers > 1:
+                    zn = zn.hidden_states[-opts.CLIP_stop_at_last_layers]
+                    zn = model.text_model.final_layer_norm(zn)
+                else:
+                    zn = zn.last_hidden_state
+                model.cpu()
+                del model
+                gc.collect()
+                torch.cuda.empty_cache()
+            zn = torch.concat([zn[77 * i:77 * (i + 1)] for i in range(max(z.shape[1] // 77, 1))], 1)
+            if self.slerp:
+                z = slerp(z, zn, self.aesthetic_weight)
+            else:
+                z = z * (1 - self.aesthetic_weight) + zn * self.aesthetic_weight
+
+        return z
diff --git a/modules/img2img.py b/modules/img2img.py
index 24126774..4ed80c4b 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -56,7 +56,14 @@ def process_batch(p, input_dir, output_dir, args):
                 processed_image.save(os.path.join(output_dir, filename))
 
 
-def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, *args):
+def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str,
+            aesthetic_lr=0,
+            aesthetic_weight=0, aesthetic_steps=0,
+            aesthetic_imgs=None,
+            aesthetic_slerp=False,
+            aesthetic_imgs_text="",
+            aesthetic_slerp_angle=0.15,
+            aesthetic_text_negative=False, *args):
     is_inpaint = mode == 1
     is_batch = mode == 2
 
@@ -109,6 +116,11 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
         inpainting_mask_invert=inpainting_mask_invert,
     )
 
+    shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps),
+                                               aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text,
+                                               aesthetic_slerp_angle,
+                                               aesthetic_text_negative)
+
     if shared.cmd_opts.enable_console_prompts:
         print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
 
diff --git a/modules/processing.py b/modules/processing.py
index 1db26c3e..685f9fcd 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -146,7 +146,8 @@ class Processed:
         self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0]
         self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
         self.seed = int(self.seed if type(self.seed) != list else self.seed[0]) if self.seed is not None else -1
-        self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
+        self.subseed = int(
+            self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
 
         self.all_prompts = all_prompts or [self.prompt]
         self.all_seeds = all_seeds or [self.seed]
@@ -332,16 +333,9 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
     return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
 
 
-def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0,
-                   aesthetic_imgs=None, aesthetic_slerp=False, aesthetic_imgs_text="",
-                   aesthetic_slerp_angle=0.15,
-                   aesthetic_text_negative=False) -> Processed:
+def process_images(p: StableDiffusionProcessing) -> Processed:
     """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
 
-    aesthetic_lr = float(aesthetic_lr)
-    aesthetic_weight = float(aesthetic_weight)
-    aesthetic_steps = int(aesthetic_steps)
-
     if type(p.prompt) == list:
         assert (len(p.prompt) > 0)
     else:
@@ -417,16 +411,10 @@ def process_images(p: StableDiffusionProcessing, aesthetic_lr=0, aesthetic_weigh
             # uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
             # c = p.sd_model.get_learned_conditioning(prompts)
             with devices.autocast():
-                if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"):
-                    shared.sd_model.cond_stage_model.set_aesthetic_params()
+                shared.aesthetic_clip.set_skip(True)
                 uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt],
                                                             p.steps)
-                if hasattr(shared.sd_model.cond_stage_model, "set_aesthetic_params"):
-                    shared.sd_model.cond_stage_model.set_aesthetic_params(aesthetic_lr, aesthetic_weight,
-                                                                          aesthetic_steps, aesthetic_imgs,
-                                                                          aesthetic_slerp, aesthetic_imgs_text,
-                                                                          aesthetic_slerp_angle,
-                                                                          aesthetic_text_negative)
+                shared.aesthetic_clip.set_skip(False)
                 c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps)
 
             if len(model_hijack.comments) > 0:
@@ -582,7 +570,6 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f
             self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
 
-
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
         self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
 
@@ -600,10 +587,12 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
                                   seed_resize_from_w=self.seed_resize_from_w, p=self)
         samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
 
-        samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
+        samples = samples[:, :, self.truncate_y // 2:samples.shape[2] - self.truncate_y // 2,
+                  self.truncate_x // 2:samples.shape[3] - self.truncate_x // 2]
 
         if opts.use_scale_latent_for_hires_fix:
-            samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+            samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f),
+                                                      mode="bilinear")
         else:
             decoded_samples = decode_first_stage(self.sd_model, samples)
             lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 5d0590af..227e7670 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -29,8 +29,8 @@ def apply_optimizations():
 
     ldm.modules.diffusionmodules.model.nonlinearity = silu
 
-
-    if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)):
+    if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (
+    6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)):
         print("Applying xformers cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward
@@ -118,33 +118,14 @@ class StableDiffusionModelHijack:
         return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count)
 
 
-def slerp(low, high, val):
-    low_norm = low / torch.norm(low, dim=1, keepdim=True)
-    high_norm = high / torch.norm(high, dim=1, keepdim=True)
-    omega = torch.acos((low_norm * high_norm).sum(1))
-    so = torch.sin(omega)
-    res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high
-    return res
-
-
 class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
     def __init__(self, wrapped, hijack):
         super().__init__()
         self.wrapped = wrapped
-        self.clipModel = CLIPModel.from_pretrained(
-            self.wrapped.transformer.name_or_path
-        )
-        del self.clipModel.vision_model
-        self.tokenizer = CLIPTokenizer.from_pretrained(self.wrapped.transformer.name_or_path)
-        self.hijack: StableDiffusionModelHijack = hijack
-        self.tokenizer = wrapped.tokenizer
-        # self.vision = CLIPVisionModel.from_pretrained(self.wrapped.transformer.name_or_path).eval()
-        self.image_embs_name = None
-        self.image_embs = None
-        self.load_image_embs(None)
 
         self.token_mults = {}
-
+        self.hijack: StableDiffusionModelHijack = hijack
+        self.tokenizer = wrapped.tokenizer
         self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ',</w>'][0]
 
         tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if
@@ -164,28 +145,6 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
             if mult != 1.0:
                 self.token_mults[ident] = mult
 
-    def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
-                             aesthetic_slerp=True, aesthetic_imgs_text="",
-                             aesthetic_slerp_angle=0.15,
-                             aesthetic_text_negative=False):
-        self.aesthetic_imgs_text = aesthetic_imgs_text
-        self.aesthetic_slerp_angle = aesthetic_slerp_angle
-        self.aesthetic_text_negative = aesthetic_text_negative
-        self.slerp = aesthetic_slerp
-        self.aesthetic_lr = aesthetic_lr
-        self.aesthetic_weight = aesthetic_weight
-        self.aesthetic_steps = aesthetic_steps
-        self.load_image_embs(image_embs_name)
-
-    def load_image_embs(self, image_embs_name):
-        if image_embs_name is None or len(image_embs_name) == 0 or image_embs_name == "None":
-            image_embs_name = None
-        if image_embs_name is not None and self.image_embs_name != image_embs_name:
-            self.image_embs_name = image_embs_name
-            self.image_embs = torch.load(shared.aesthetic_embeddings[self.image_embs_name], map_location=device)
-            self.image_embs /= self.image_embs.norm(dim=-1, keepdim=True)
-            self.image_embs.requires_grad_(False)
-
     def tokenize_line(self, line, used_custom_terms, hijack_comments):
         id_end = self.wrapped.tokenizer.eos_token_id
 
@@ -391,58 +350,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
 
             z1 = self.process_tokens(tokens, multipliers)
             z = z1 if z is None else torch.cat((z, z1), axis=-2)
-
-            if self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name != None:
-                if not opts.use_old_emphasis_implementation:
-                    remade_batch_tokens = [
-                        [self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in
-                        remade_batch_tokens]
-
-                tokens = torch.asarray(remade_batch_tokens).to(device)
-
-                model = copy.deepcopy(self.clipModel).to(device)
-                model.requires_grad_(True)
-                if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0:
-                    text_embs_2 = model.get_text_features(
-                        **self.tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device))
-                    if self.aesthetic_text_negative:
-                        text_embs_2 = self.image_embs - text_embs_2
-                        text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True)
-                    img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle)
-                else:
-                    img_embs = self.image_embs
-
-                with torch.enable_grad():
-
-                    # We optimize the model to maximize the similarity
-                    optimizer = optim.Adam(
-                        model.text_model.parameters(), lr=self.aesthetic_lr
-                    )
-
-                    for i in trange(self.aesthetic_steps, desc="Aesthetic optimization"):
-                        text_embs = model.get_text_features(input_ids=tokens)
-                        text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True)
-                        sim = text_embs @ img_embs.T
-                        loss = -sim
-                        optimizer.zero_grad()
-                        loss.mean().backward()
-                        optimizer.step()
-
-                    zn = model.text_model(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
-                    if opts.CLIP_stop_at_last_layers > 1:
-                        zn = zn.hidden_states[-opts.CLIP_stop_at_last_layers]
-                        zn = model.text_model.final_layer_norm(zn)
-                    else:
-                        zn = zn.last_hidden_state
-                    model.cpu()
-                    del model
-
-                zn = torch.concat([zn for i in range(z.shape[1] // 77)], 1)
-                if self.slerp:
-                    z = slerp(z, zn, self.aesthetic_weight)
-                else:
-                    z = z * (1 - self.aesthetic_weight) + zn * self.aesthetic_weight
-
+            z = shared.aesthetic_clip(z, remade_batch_tokens)
             remade_batch_tokens = rem_tokens
             batch_multipliers = rem_multipliers
             i += 1
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 3aa21ec1..8e4ee435 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -20,7 +20,7 @@ checkpoints_loaded = collections.OrderedDict()
 try:
     # this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start.
 
-    from transformers import logging
+    from transformers import logging, CLIPModel
 
     logging.set_verbosity_error()
 except Exception:
@@ -196,6 +196,9 @@ def load_model():
 
     sd_hijack.model_hijack.hijack(sd_model)
 
+    if shared.clip_model is None or shared.clip_model.transformer.name_or_path != sd_model.cond_stage_model.wrapped.transformer.name_or_path:
+        shared.clip_model = CLIPModel.from_pretrained(sd_model.cond_stage_model.wrapped.transformer.name_or_path)
+
     sd_model.eval()
 
     print(f"Model loaded.")
diff --git a/modules/shared.py b/modules/shared.py
index e2c98b2d..e19ca779 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -3,6 +3,7 @@ import datetime
 import json
 import os
 import sys
+from collections import OrderedDict
 
 import gradio as gr
 import tqdm
@@ -94,15 +95,15 @@ os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True)
 hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
 loaded_hypernetwork = None
 
-aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
-                        os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
-aesthetic_embeddings = aesthetic_embeddings | {"None": None}
+aesthetic_embeddings = {}
 
 def update_aesthetic_embeddings():
     global aesthetic_embeddings
     aesthetic_embeddings = {f.replace(".pt",""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
                             os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
-    aesthetic_embeddings = aesthetic_embeddings | {"None": None}
+    aesthetic_embeddings = OrderedDict(**{"None": None}, **aesthetic_embeddings)
+
+update_aesthetic_embeddings()
 
 def reload_hypernetworks():
     global hypernetworks
@@ -381,6 +382,11 @@ sd_upscalers = []
 
 sd_model = None
 
+clip_model = None
+
+from modules.aesthetic_clip import AestheticCLIP
+aesthetic_clip = AestheticCLIP()
+
 progress_print_out = sys.stdout
 
 
diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py
index 68ceffe3..23bb4b6a 100644
--- a/modules/textual_inversion/dataset.py
+++ b/modules/textual_inversion/dataset.py
@@ -49,7 +49,7 @@ class PersonalizedBase(Dataset):
         print("Preparing dataset...")
         for path in tqdm.tqdm(self.image_paths):
             try:
-                image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.Resampling.BICUBIC)
+                image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)
             except Exception:
                 continue
 
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 8f394d05..6cbc50fc 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -1,12 +1,17 @@
 import modules.scripts
-from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
+from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, \
+    StableDiffusionProcessingImg2Img, process_images
 from modules.shared import opts, cmd_opts
 import modules.shared as shared
 import modules.processing as processing
 from modules.ui import plaintext_to_html
 
 
-def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int, firstphase_height: int,aesthetic_lr=0,
+def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int,
+            restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int,
+            subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool,
+            height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int,
+            firstphase_height: int, aesthetic_lr=0,
             aesthetic_weight=0, aesthetic_steps=0,
             aesthetic_imgs=None,
             aesthetic_slerp=False,
@@ -41,15 +46,17 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
         firstphase_height=firstphase_height if enable_hr else None,
     )
 
+    shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps),
+                                           aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle,
+                                           aesthetic_text_negative)
+
     if cmd_opts.enable_console_prompts:
         print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
 
     processed = modules.scripts.scripts_txt2img.run(p, *args)
 
     if processed is None:
-        processed = process_images(p, aesthetic_lr, aesthetic_weight, aesthetic_steps, aesthetic_imgs, aesthetic_slerp,aesthetic_imgs_text,
-                           aesthetic_slerp_angle,
-                           aesthetic_text_negative)
+        processed = process_images(p)
 
     shared.total_tqdm.clear()
 
@@ -61,4 +68,3 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
         processed.images = []
 
     return processed.images, generation_info_js, plaintext_to_html(processed.info)
-
diff --git a/modules/ui.py b/modules/ui.py
index 4069f0d2..0e5d73f0 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -43,7 +43,7 @@ from modules.images import save_image
 import modules.textual_inversion.ui
 import modules.hypernetworks.ui
 
-import modules.aesthetic_clip
+import modules.aesthetic_clip as aesthetic_clip
 import modules.images_history as img_his
 
 
@@ -593,23 +593,25 @@ def create_ui(wrap_gradio_gpu_call):
                     width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
 
-                with gr.Group():
-                    with gr.Accordion("Open for Clip Aesthetic!",open=False):
-                        with gr.Row():
-                            aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
-                            aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
-
-                        with gr.Row():
-                            aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001")
-                            aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
-                            aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()),
-                                                         label="Aesthetic imgs embedding",
-                                                         value="None")
-
-                        with gr.Row():
-                            aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="")
-                            aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
-                            aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
+                # with gr.Group():
+                #     with gr.Accordion("Open for Clip Aesthetic!",open=False):
+                #         with gr.Row():
+                #             aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight", value=0.9)
+                #             aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
+                #
+                #         with gr.Row():
+                #             aesthetic_lr = gr.Textbox(label='Aesthetic learning rate', placeholder="Aesthetic learning rate", value="0.0001")
+                #             aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
+                #             aesthetic_imgs = gr.Dropdown(sorted(aesthetic_embeddings.keys()),
+                #                                          label="Aesthetic imgs embedding",
+                #                                          value="None")
+                #
+                #         with gr.Row():
+                #             aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs', placeholder="This text is used to rotate the feature space of the imgs embs", value="")
+                #             aesthetic_slerp_angle = gr.Slider(label='Slerp angle',minimum=0, maximum=1, step=0.01, value=0.1)
+                #             aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
+
+                aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative = aesthetic_clip.create_ui()
 
 
                 with gr.Row():
@@ -840,6 +842,9 @@ def create_ui(wrap_gradio_gpu_call):
                     width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
 
+                aesthetic_weight_im, aesthetic_steps_im, aesthetic_lr_im, aesthetic_slerp_im, aesthetic_imgs_im, aesthetic_imgs_text_im, aesthetic_slerp_angle_im, aesthetic_text_negative_im = aesthetic_clip.create_ui()
+
+
                 with gr.Row():
                     restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
                     tiling = gr.Checkbox(label='Tiling', value=False)
@@ -944,6 +949,14 @@ def create_ui(wrap_gradio_gpu_call):
                     inpainting_mask_invert,
                     img2img_batch_input_dir,
                     img2img_batch_output_dir,
+                    aesthetic_lr_im,
+                    aesthetic_weight_im,
+                    aesthetic_steps_im,
+                    aesthetic_imgs_im,
+                    aesthetic_slerp_im,
+                    aesthetic_imgs_text_im,
+                    aesthetic_slerp_angle_im,
+                    aesthetic_text_negative_im,
                 ] + custom_inputs,
                 outputs=[
                     img2img_gallery,
@@ -1283,7 +1296,7 @@ def create_ui(wrap_gradio_gpu_call):
         )
 
         create_embedding_ae.click(
-            fn=modules.aesthetic_clip.generate_imgs_embd,
+            fn=aesthetic_clip.generate_imgs_embd,
             inputs=[
                 new_embedding_name_ae,
                 process_src_ae,
@@ -1291,6 +1304,7 @@ def create_ui(wrap_gradio_gpu_call):
             ],
             outputs=[
                 aesthetic_imgs,
+                aesthetic_imgs_im,
                 ti_output,
                 ti_outcome,
             ]
-- 
cgit v1.2.3


From 019a3a88f07766f2d32c32fbe8e41625f28ecb5e Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Wed, 19 Oct 2022 17:15:47 +0100
Subject: Update ui.py

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index d2e24880..1573ef82 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1247,7 +1247,7 @@ def create_ui(wrap_gradio_gpu_call):
                             run_preprocess = gr.Button(value="Preprocess", variant='primary')
 
                 with gr.Tab(label="Train"):
-                    gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding; must specify a directory with a set of 1:1 ratio images</p>")
+                    gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images<br/>Initial learning rates: 0.005 for an Embedding, 0.00001 for Hypernetwork <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion">wiki</a></p>")
                     with gr.Row():
                         train_embedding_name = gr.Dropdown(label='Embedding', elem_id="train_embedding", choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys()))
                         create_refresh_button(train_embedding_name, sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings, lambda: {"choices": sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())}, "refresh_train_embedding_name")
-- 
cgit v1.2.3


From eb7ba4b713ac2fb960ecf6365b1de0c89451e583 Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Wed, 19 Oct 2022 19:50:46 +0100
Subject: update training header text

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 1573ef82..93c0767c 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1247,7 +1247,7 @@ def create_ui(wrap_gradio_gpu_call):
                             run_preprocess = gr.Button(value="Preprocess", variant='primary')
 
                 with gr.Tab(label="Train"):
-                    gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images<br/>Initial learning rates: 0.005 for an Embedding, 0.00001 for Hypernetwork <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion">wiki</a></p>")
+                    gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images<br/>Initial learning rates: 0.005 for an Embedding, 0.00001 for Hypernetwork <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
                     with gr.Row():
                         train_embedding_name = gr.Dropdown(label='Embedding', elem_id="train_embedding", choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys()))
                         create_refresh_button(train_embedding_name, sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings, lambda: {"choices": sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())}, "refresh_train_embedding_name")
-- 
cgit v1.2.3


From 4d663055ded968831ec97f047dfa8e94036cf1c1 Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Wed, 19 Oct 2022 20:33:18 +0100
Subject: update ui with extra training options

---
 modules/ui.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 93c0767c..cdb9d335 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1206,6 +1206,7 @@ def create_ui(wrap_gradio_gpu_call):
                     new_embedding_name = gr.Textbox(label="Name")
                     initialization_text = gr.Textbox(label="Initialization text", value="*")
                     nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1)
+                    overwrite_old_embedding = gr.Checkbox(value=False, label="Overwrite Old Embedding")
 
                     with gr.Row():
                         with gr.Column(scale=3):
@@ -1219,6 +1220,7 @@ def create_ui(wrap_gradio_gpu_call):
                     new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
                     new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
                     new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization")
+                    overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork")
 
                     with gr.Row():
                         with gr.Column(scale=3):
@@ -1247,14 +1249,17 @@ def create_ui(wrap_gradio_gpu_call):
                             run_preprocess = gr.Button(value="Preprocess", variant='primary')
 
                 with gr.Tab(label="Train"):
-                    gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images<br/>Initial learning rates: 0.005 for an Embedding, 0.00001 for Hypernetwork <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
+                    gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
                     with gr.Row():
                         train_embedding_name = gr.Dropdown(label='Embedding', elem_id="train_embedding", choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys()))
                         create_refresh_button(train_embedding_name, sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings, lambda: {"choices": sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())}, "refresh_train_embedding_name")
                     with gr.Row():
                         train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', elem_id="train_hypernetwork", choices=[x for x in shared.hypernetworks.keys()])
                         create_refresh_button(train_hypernetwork_name, shared.reload_hypernetworks, lambda: {"choices": sorted([x for x in shared.hypernetworks.keys()])}, "refresh_train_hypernetwork_name")
-                    learn_rate = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005")
+                    with gr.Row():
+                        embedding_learn_rate = gr.Textbox(label='Embedding Learning rate', placeholder="Embedding Learning rate", value="0.005")
+                        hypernetwork_learn_rate = gr.Textbox(label='Hypernetwork Learning rate', placeholder="Hypernetwork Learning rate", value="0.00001")
+                    
                     batch_size = gr.Number(label='Batch size', value=1, precision=0)
                     dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images")
                     log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion")
@@ -1288,6 +1293,7 @@ def create_ui(wrap_gradio_gpu_call):
                 new_embedding_name,
                 initialization_text,
                 nvpt,
+                overwrite_old_embedding,
             ],
             outputs=[
                 train_embedding_name,
@@ -1303,6 +1309,7 @@ def create_ui(wrap_gradio_gpu_call):
                 new_hypernetwork_sizes,
                 new_hypernetwork_layer_structure,
                 new_hypernetwork_add_layer_norm,
+                overwrite_old_hypernetwork,
             ],
             outputs=[
                 train_hypernetwork_name,
-- 
cgit v1.2.3


From 632e8d660293081cadb145d8062e5aff0a4a8f0d Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Thu, 20 Oct 2022 00:19:40 +0100
Subject: split learn rates

---
 modules/ui.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index cdb9d335..d07184ee 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1342,7 +1342,7 @@ def create_ui(wrap_gradio_gpu_call):
             _js="start_training_textual_inversion",
             inputs=[
                 train_embedding_name,
-                learn_rate,
+                embedding_learn_rate,
                 batch_size,
                 dataset_directory,
                 log_directory,
@@ -1367,7 +1367,7 @@ def create_ui(wrap_gradio_gpu_call):
             _js="start_training_textual_inversion",
             inputs=[
                 train_hypernetwork_name,
-                learn_rate,
+                hypernetwork_learn_rate,
                 batch_size,
                 dataset_directory,
                 log_directory,
-- 
cgit v1.2.3


From 4d6b9f76a55fd0ac0f72634071032dd9c6efb409 Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Thu, 20 Oct 2022 00:27:16 +0100
Subject: reorder create_hypernetwork params

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index d07184ee..322c082b 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1307,9 +1307,9 @@ def create_ui(wrap_gradio_gpu_call):
             inputs=[
                 new_hypernetwork_name,
                 new_hypernetwork_sizes,
+                overwrite_old_hypernetwork,
                 new_hypernetwork_layer_structure,
                 new_hypernetwork_add_layer_norm,
-                overwrite_old_hypernetwork,
             ],
             outputs=[
                 train_hypernetwork_name,
-- 
cgit v1.2.3


From ab353b141df8eee042b0964bcb645015dabf3459 Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Thu, 20 Oct 2022 00:48:07 +0100
Subject: link existing txt option

---
 modules/ui.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 322c082b..7f52ac0c 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1234,6 +1234,7 @@ def create_ui(wrap_gradio_gpu_call):
                     process_dst = gr.Textbox(label='Destination directory')
                     process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
                     process_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+                    preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', choices=['ignore', 'copy', 'prepend', 'append'])
 
                     with gr.Row():
                         process_flip = gr.Checkbox(label='Create flipped copies')
@@ -1326,6 +1327,7 @@ def create_ui(wrap_gradio_gpu_call):
                 process_dst,
                 process_width,
                 process_height,
+                preprocess_txt_action,
                 process_flip,
                 process_split,
                 process_caption,
-- 
cgit v1.2.3


From 55d8c6cce6d3aef848b9f194adad2ce53064d8b7 Mon Sep 17 00:00:00 2001
From: DepFA <35278260+dfaker@users.noreply.github.com>
Date: Thu, 20 Oct 2022 00:53:29 +0100
Subject: default to ignore existing captions

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 7f52ac0c..bd5f1b05 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1234,7 +1234,7 @@ def create_ui(wrap_gradio_gpu_call):
                     process_dst = gr.Textbox(label='Destination directory')
                     process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
                     process_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
-                    preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', choices=['ignore', 'copy', 'prepend', 'append'])
+                    preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"])
 
                     with gr.Row():
                         process_flip = gr.Checkbox(label='Create flipped copies')
-- 
cgit v1.2.3


From 6f98e89486f55b0e4657e96ce640cf1c4675d187 Mon Sep 17 00:00:00 2001
From: discus0434 <discus0434@gmail.com>
Date: Thu, 20 Oct 2022 00:10:45 +0000
Subject: update

---
 modules/hypernetworks/hypernetwork.py | 29 +++++++++++++++--------
 modules/hypernetworks/ui.py           |  3 ++-
 modules/ui.py                         | 43 +++++++++++++++++++----------------
 3 files changed, 44 insertions(+), 31 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 74300122..7d617680 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -22,16 +22,20 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler
 class HypernetworkModule(torch.nn.Module):
     multiplier = 1.0
 
-    def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False):
+    def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None):
         super().__init__()
 
-        assert layer_structure is not None, "layer_structure mut not be None"
+        assert layer_structure is not None, "layer_structure must not be None"
         assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!"
         assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!"
 
         linears = []
         for i in range(len(layer_structure) - 1):
             linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
+            if activation_func == "relu":
+                linears.append(torch.nn.ReLU())
+            if activation_func == "leakyrelu":
+                linears.append(torch.nn.LeakyReLU())
             if add_layer_norm:
                 linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
 
@@ -42,8 +46,9 @@ class HypernetworkModule(torch.nn.Module):
             self.load_state_dict(state_dict)
         else:
             for layer in self.linear:
-                layer.weight.data.normal_(mean=0.0, std=0.01)
-                layer.bias.data.zero_()
+                if not "ReLU" in layer.__str__():
+                    layer.weight.data.normal_(mean=0.0, std=0.01)
+                    layer.bias.data.zero_()
 
         self.to(devices.device)
 
@@ -69,7 +74,8 @@ class HypernetworkModule(torch.nn.Module):
     def trainables(self):
         layer_structure = []
         for layer in self.linear:
-            layer_structure += [layer.weight, layer.bias]
+            if not "ReLU" in layer.__str__():
+                layer_structure += [layer.weight, layer.bias]
         return layer_structure
 
 
@@ -81,7 +87,7 @@ class Hypernetwork:
     filename = None
     name = None
 
-    def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False):
+    def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False, activation_func=None):
         self.filename = None
         self.name = name
         self.layers = {}
@@ -90,11 +96,12 @@ class Hypernetwork:
         self.sd_checkpoint_name = None
         self.layer_structure = layer_structure
         self.add_layer_norm = add_layer_norm
+        self.activation_func = activation_func
 
         for size in enable_sizes or []:
             self.layers[size] = (
-                HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm),
-                HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm),
+                HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func),
+                HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func),
             )
 
     def weights(self):
@@ -117,6 +124,7 @@ class Hypernetwork:
         state_dict['name'] = self.name
         state_dict['layer_structure'] = self.layer_structure
         state_dict['is_layer_norm'] = self.add_layer_norm
+        state_dict['activation_func'] = self.activation_func
         state_dict['sd_checkpoint'] = self.sd_checkpoint
         state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
 
@@ -131,12 +139,13 @@ class Hypernetwork:
 
         self.layer_structure = state_dict.get('layer_structure', [1, 2, 1])
         self.add_layer_norm = state_dict.get('is_layer_norm', False)
+        self.activation_func = state_dict.get('activation_func', None)
 
         for size, sd in state_dict.items():
             if type(size) == int:
                 self.layers[size] = (
-                    HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm),
-                    HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm),
+                    HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm, self.activation_func),
+                    HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm, self.activation_func),
                 )
 
         self.name = state_dict.get('name', self.name)
diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py
index 08f75f15..83f9547b 100644
--- a/modules/hypernetworks/ui.py
+++ b/modules/hypernetworks/ui.py
@@ -10,7 +10,7 @@ from modules import sd_hijack, shared, devices
 from modules.hypernetworks import hypernetwork
 
 
-def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False):
+def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False, activation_func=None):
     fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
     assert not os.path.exists(fn), f"file {fn} already exists"
 
@@ -22,6 +22,7 @@ def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm
         enable_sizes=[int(x) for x in enable_sizes],
         layer_structure=layer_structure,
         add_layer_norm=add_layer_norm,
+        activation_func=activation_func,
     )
     hypernet.save(fn)
 
diff --git a/modules/ui.py b/modules/ui.py
index d2e24880..8751fa9c 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -5,43 +5,44 @@ import json
 import math
 import mimetypes
 import os
+import platform
 import random
+import subprocess as sp
 import sys
 import tempfile
 import time
 import traceback
-import platform
-import subprocess as sp
 from functools import partial, reduce
 
+import gradio as gr
+import gradio.routes
+import gradio.utils
 import numpy as np
+import piexif
 import torch
 from PIL import Image, PngImagePlugin
-import piexif
 
-import gradio as gr
-import gradio.utils
-import gradio.routes
-
-from modules import sd_hijack, sd_models, localization
+from modules import localization, sd_hijack, sd_models
 from modules.paths import script_path
-from modules.shared import opts, cmd_opts, restricted_opts
+from modules.shared import cmd_opts, opts, restricted_opts
+
 if cmd_opts.deepdanbooru:
     from modules.deepbooru import get_deepbooru_tags
-import modules.shared as shared
-from modules.sd_samplers import samplers, samplers_for_img2img
-from modules.sd_hijack import model_hijack
+
+import modules.codeformer_model
+import modules.generation_parameters_copypaste
+import modules.gfpgan_model
+import modules.hypernetworks.ui
+import modules.images_history as img_his
 import modules.ldsr_model
 import modules.scripts
-import modules.gfpgan_model
-import modules.codeformer_model
+import modules.shared as shared
 import modules.styles
-import modules.generation_parameters_copypaste
+import modules.textual_inversion.ui
 from modules import prompt_parser
 from modules.images import save_image
-import modules.textual_inversion.ui
-import modules.hypernetworks.ui
-import modules.images_history as img_his
+from modules.sd_hijack import model_hijack
+from modules.sd_samplers import samplers, samplers_for_img2img
 
 # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI
 mimetypes.init()
@@ -268,8 +269,8 @@ def calc_time_left(progress, threshold, label, force_display):
         time_since_start = time.time() - shared.state.time_start
         eta = (time_since_start/progress)
         eta_relative = eta-time_since_start
-        if (eta_relative > threshold and progress > 0.02) or force_display:           
-            return label + time.strftime('%H:%M:%S', time.gmtime(eta_relative))        
+        if (eta_relative > threshold and progress > 0.02) or force_display:
+            return label + time.strftime('%H:%M:%S', time.gmtime(eta_relative))
         else:
             return ""
 
@@ -1219,6 +1220,7 @@ def create_ui(wrap_gradio_gpu_call):
                     new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
                     new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
                     new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization")
+                    new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=["relu", "leakyrelu"])
 
                     with gr.Row():
                         with gr.Column(scale=3):
@@ -1303,6 +1305,7 @@ def create_ui(wrap_gradio_gpu_call):
                 new_hypernetwork_sizes,
                 new_hypernetwork_layer_structure,
                 new_hypernetwork_add_layer_norm,
+                new_hypernetwork_activation_func,
             ],
             outputs=[
                 train_hypernetwork_name,
-- 
cgit v1.2.3


From ba469343e6a1c6e23e82acf5feb65c6101dacbb2 Mon Sep 17 00:00:00 2001
From: discus0434 <discus0434@gmail.com>
Date: Thu, 20 Oct 2022 00:17:04 +0000
Subject: align ui.py imports with upstream

---
 modules/ui.py | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 987b1d7d..913b23b4 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -5,44 +5,43 @@ import json
 import math
 import mimetypes
 import os
-import platform
 import random
-import subprocess as sp
 import sys
 import tempfile
 import time
 import traceback
+import platform
+import subprocess as sp
 from functools import partial, reduce
 
-import gradio as gr
-import gradio.routes
-import gradio.utils
 import numpy as np
-import piexif
 import torch
 from PIL import Image, PngImagePlugin
+import piexif
 
-from modules import localization, sd_hijack, sd_models
-from modules.paths import script_path
-from modules.shared import cmd_opts, opts, restricted_opts
+import gradio as gr
+import gradio.utils
+import gradio.routes
 
+from modules import sd_hijack, sd_models, localization
+from modules.paths import script_path
+from modules.shared import opts, cmd_opts, restricted_opts
 if cmd_opts.deepdanbooru:
     from modules.deepbooru import get_deepbooru_tags
-
-import modules.codeformer_model
-import modules.generation_parameters_copypaste
-import modules.gfpgan_model
-import modules.hypernetworks.ui
-import modules.images_history as img_his
+import modules.shared as shared
+from modules.sd_samplers import samplers, samplers_for_img2img
+from modules.sd_hijack import model_hijack
 import modules.ldsr_model
 import modules.scripts
-import modules.shared as shared
+import modules.gfpgan_model
+import modules.codeformer_model
 import modules.styles
-import modules.textual_inversion.ui
+import modules.generation_parameters_copypaste
 from modules import prompt_parser
 from modules.images import save_image
-from modules.sd_hijack import model_hijack
-from modules.sd_samplers import samplers, samplers_for_img2img
+import modules.textual_inversion.ui
+import modules.hypernetworks.ui
+import modules.images_history as img_his
 
 # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI
 mimetypes.init()
-- 
cgit v1.2.3


From f8733ad08be08bafb40f4299785590e11f049e96 Mon Sep 17 00:00:00 2001
From: discus0434 <discus0434@gmail.com>
Date: Thu, 20 Oct 2022 11:07:37 +0000
Subject: add linear as a act func (option for doin nothing)

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/ui.py')

diff --git a/modules/ui.py b/modules/ui.py
index 913b23b4..716f14b8 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1224,7 +1224,7 @@ def create_ui(wrap_gradio_gpu_call):
                     new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
                     new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
                     new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization")
-                    new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=["relu", "leakyrelu"])
+                    new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=["linear", "relu", "leakyrelu"])
 
                     with gr.Row():
                         with gr.Column(scale=3):
-- 
cgit v1.2.3


From 85dd62c4c7635b8e21a75f140d093036069e97a1 Mon Sep 17 00:00:00 2001
From: Milly <milly.ca@gmail.com>
Date: Thu, 20 Oct 2022 22:56:45 +0900
Subject: train: ui: added `Split image threshold` and `Split image overlap
 ratio` to preprocess

---
 modules/textual_inversion/preprocess.py | 10 +++++-----
 modules/ui.py                           | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py
index 2743bdeb..c8df8aa0 100644
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@@ -12,7 +12,7 @@ if cmd_opts.deepdanbooru:
     import modules.deepbooru as deepbooru
 
 
-def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False):
+def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
     try:
         if process_caption:
             shared.interrogator.load()
@@ -22,7 +22,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_
             db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
             deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
 
-        preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru)
+        preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio)
 
     finally:
 
@@ -34,13 +34,13 @@ def preprocess(process_src, process_dst, process_width, process_height, process_
 
 
-def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False):
+def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
     width = process_width
     height = process_height
     src = os.path.abspath(process_src)
     dst = os.path.abspath(process_dst)
-    split_threshold = 0.5
-    overlap_ratio = 0.2
+    split_threshold = max(0.0, min(1.0, split_threshold))
+    overlap_ratio = max(0.0, min(0.9, overlap_ratio))
 
     assert src != dst, 'same directory specified as source and destination'
 
diff --git a/modules/ui.py b/modules/ui.py
index a2dbd41e..bc7f3330 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1240,10 +1240,14 @@ def create_ui(wrap_gradio_gpu_call):
 
                     with gr.Row():
                         process_flip = gr.Checkbox(label='Create flipped copies')
-                        process_split = gr.Checkbox(label='Split oversized images into two')
+                        process_split = gr.Checkbox(label='Split oversized images')
                         process_caption = gr.Checkbox(label='Use BLIP for caption')
                         process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False)
 
+                    with gr.Row(visible=False) as process_split_extra_row:
+                        process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
+                        process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05)
+
                     with gr.Row():
                         with gr.Column(scale=3):
                             gr.HTML(value="")
@@ -1251,6 +1255,12 @@ def create_ui(wrap_gradio_gpu_call):
                         with gr.Column():
                             run_preprocess = gr.Button(value="Preprocess", variant='primary')
 
+                    process_split.change(
+                        fn=lambda show: gr_show(show),
+                        inputs=[process_split],
+                        outputs=[process_split_extra_row],
+                    )
+
                 with gr.Tab(label="Train"):
                     gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding; must specify a directory with a set of 1:1 ratio images</p>")
                     with gr.Row():
@@ -1327,7 +1337,9 @@ def create_ui(wrap_gradio_gpu_call):
                 process_flip,
                 process_split,
                 process_caption,
-                process_caption_deepbooru
+                process_caption_deepbooru,
+                process_split_threshold,
+                process_overlap_ratio,
             ],
             outputs=[
                 ti_output,
-- 
cgit v1.2.3


From df5706409386cc2e88718bd9101045587c39f8bb Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Fri, 21 Oct 2022 16:10:51 +0300
Subject: do not load aesthetic clip model until it's needed add refresh button
 for aesthetic embeddings add aesthetic params to images' infotext

---
 modules/aesthetic_clip.py                  | 40 +++++++++++++++++++----
 modules/generation_parameters_copypaste.py | 18 +++++++++--
 modules/img2img.py                         |  5 +--
 modules/processing.py                      |  4 +--
 modules/sd_models.py                       |  3 --
 modules/txt2img.py                         |  4 +--
 modules/ui.py                              | 52 ++++++++++++++++++++----------
 style.css                                  |  2 +-
 8 files changed, 89 insertions(+), 39 deletions(-)

(limited to 'modules/ui.py')

diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py
index 34efa931..8c828541 100644
--- a/modules/aesthetic_clip.py
+++ b/modules/aesthetic_clip.py
@@ -40,6 +40,8 @@ def iter_to_batched(iterable, n=1):
 
 
 def create_ui():
+    import modules.ui
+
     with gr.Group():
         with gr.Accordion("Open for Clip Aesthetic!", open=False):
             with gr.Row():
@@ -55,6 +57,8 @@ def create_ui():
                                              label="Aesthetic imgs embedding",
                                              value="None")
 
+                modules.ui.create_refresh_button(aesthetic_imgs, shared.update_aesthetic_embeddings, lambda: {"choices": sorted(shared.aesthetic_embeddings.keys())}, "refresh_aesthetic_embeddings")
+
             with gr.Row():
                 aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs',
                                                  placeholder="This text is used to rotate the feature space of the imgs embs",
@@ -66,11 +70,21 @@ def create_ui():
     return aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative
 
 
+aesthetic_clip_model = None
+
+
+def aesthetic_clip():
+    global aesthetic_clip_model
+
+    if aesthetic_clip_model is None or aesthetic_clip_model.name_or_path != shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path:
+        aesthetic_clip_model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path)
+        aesthetic_clip_model.cpu()
+
+    return aesthetic_clip_model
+
+
 def generate_imgs_embd(name, folder, batch_size):
-    # clipModel = CLIPModel.from_pretrained(
-    #     shared.sd_model.cond_stage_model.clipModel.name_or_path
-    # )
-    model = shared.clip_model.to(device)
+    model = aesthetic_clip().to(device)
     processor = CLIPProcessor.from_pretrained(model.name_or_path)
 
     with torch.no_grad():
@@ -91,7 +105,7 @@ def generate_imgs_embd(name, folder, batch_size):
         path = str(Path(shared.cmd_opts.aesthetic_embeddings_dir) / f"{name}.pt")
         torch.save(embs, path)
 
-        model = model.cpu()
+        model.cpu()
         del processor
         del embs
         gc.collect()
@@ -132,7 +146,7 @@ class AestheticCLIP:
         self.image_embs = None
         self.load_image_embs(None)
 
-    def set_aesthetic_params(self, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
+    def set_aesthetic_params(self, p, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
                              aesthetic_slerp=True, aesthetic_imgs_text="",
                              aesthetic_slerp_angle=0.15,
                              aesthetic_text_negative=False):
@@ -145,6 +159,18 @@ class AestheticCLIP:
         self.aesthetic_steps = aesthetic_steps
         self.load_image_embs(image_embs_name)
 
+        if self.image_embs_name is not None:
+            p.extra_generation_params.update({
+                "Aesthetic LR": aesthetic_lr,
+                "Aesthetic weight": aesthetic_weight,
+                "Aesthetic steps": aesthetic_steps,
+                "Aesthetic embedding": self.image_embs_name,
+                "Aesthetic slerp": aesthetic_slerp,
+                "Aesthetic text": aesthetic_imgs_text,
+                "Aesthetic text negative": aesthetic_text_negative,
+                "Aesthetic slerp angle": aesthetic_slerp_angle,
+            })
+
     def set_skip(self, skip):
         self.skip = skip
 
@@ -168,7 +194,7 @@ class AestheticCLIP:
 
             tokens = torch.asarray(remade_batch_tokens).to(device)
 
-            model = copy.deepcopy(shared.clip_model).to(device)
+            model = copy.deepcopy(aesthetic_clip()).to(device)
             model.requires_grad_(True)
             if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0:
                 text_embs_2 = model.get_text_features(
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 0f041449..f73647da 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -4,13 +4,22 @@ import gradio as gr
 from modules.shared import script_path
 from modules import shared
 
-re_param_code = r"\s*([\w ]+):\s*([^,]+)(?:,|$)"
+re_param_code = r'\s*([\w ]+):\s*("(?:\\|\"|[^\"])+"|[^,]*)(?:,|$)'
 re_param = re.compile(re_param_code)
 re_params = re.compile(r"^(?:" + re_param_code + "){3,}$")
 re_imagesize = re.compile(r"^(\d+)x(\d+)$")
 type_of_gr_update = type(gr.update())
 
 
+def quote(text):
+    if ',' not in str(text):
+        return text
+
+    text = str(text)
+    text = text.replace('\\', '\\\\')
+    text = text.replace('"', '\\"')
+    return f'"{text}"'
+
 def parse_generation_parameters(x: str):
     """parses generation parameters string, the one you see in text field under the picture in UI:
 ```
@@ -83,7 +92,12 @@ def connect_paste(button, paste_fields, input_comp, js=None):
             else:
                 try:
                     valtype = type(output.value)
-                    val = valtype(v)
+
+                    if valtype == bool and v == "False":
+                        val = False
+                    else:
+                        val = valtype(v)
+
                     res.append(gr.update(value=val))
                 except Exception:
                     res.append(gr.update())
diff --git a/modules/img2img.py b/modules/img2img.py
index bc7c66bc..eea5199b 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -109,10 +109,7 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
         inpainting_mask_invert=inpainting_mask_invert,
     )
 
-    shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps),
-                                               aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text,
-                                               aesthetic_slerp_angle,
-                                               aesthetic_text_negative)
+    shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative)
 
     if shared.cmd_opts.enable_console_prompts:
         print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
diff --git a/modules/processing.py b/modules/processing.py
index d1deffa9..f0852cd5 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -12,7 +12,7 @@ from skimage import exposure
 from typing import Any, Dict, List, Optional
 
 import modules.sd_hijack
-from modules import devices, prompt_parser, masking, sd_samplers, lowvram
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste
 from modules.sd_hijack import model_hijack
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
@@ -318,7 +318,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
 
     generation_params.update(p.extra_generation_params)
 
-    generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
+    generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
 
     negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else ""
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 05a1df28..b1c91b0d 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -234,9 +234,6 @@ def load_model(checkpoint_info=None):
 
     sd_hijack.model_hijack.hijack(sd_model)
 
-    if shared.clip_model is None or shared.clip_model.transformer.name_or_path != sd_model.cond_stage_model.wrapped.transformer.name_or_path:
-        shared.clip_model = CLIPModel.from_pretrained(sd_model.cond_stage_model.wrapped.transformer.name_or_path)
-
     sd_model.eval()
 
     print(f"Model loaded.")
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 32ed1d8d..1761cfa2 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -36,9 +36,7 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
         firstphase_height=firstphase_height if enable_hr else None,
     )
 
-    shared.aesthetic_clip.set_aesthetic_params(float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps),
-                                           aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle,
-                                           aesthetic_text_negative)
+    shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative)
 
     if cmd_opts.enable_console_prompts:
         print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
diff --git a/modules/ui.py b/modules/ui.py
index 381ca925..0d020de6 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -597,27 +597,29 @@ def apply_setting(key, value):
     return value
 
 
-def create_ui(wrap_gradio_gpu_call):
-    import modules.img2img
-    import modules.txt2img
+def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
+    def refresh():
+        refresh_method()
+        args = refreshed_args() if callable(refreshed_args) else refreshed_args
 
-    def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
-        def refresh():
-            refresh_method()
-            args = refreshed_args() if callable(refreshed_args) else refreshed_args
+        for k, v in args.items():
+            setattr(refresh_component, k, v)
 
-            for k, v in args.items():
-                setattr(refresh_component, k, v)
+        return gr.update(**(args or {}))
 
-            return gr.update(**(args or {}))
+    refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
+    refresh_button.click(
+        fn=refresh,
+        inputs=[],
+        outputs=[refresh_component]
+    )
+    return refresh_button
+
+
+def create_ui(wrap_gradio_gpu_call):
+    import modules.img2img
+    import modules.txt2img
 
-        refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
-        refresh_button.click(
-            fn = refresh,
-            inputs = [],
-            outputs = [refresh_component]
-        )
-        return refresh_button
 
     with gr.Blocks(analytics_enabled=False) as txt2img_interface:
         txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False)
@@ -802,6 +804,14 @@ def create_ui(wrap_gradio_gpu_call):
                 (hr_options, lambda d: gr.Row.update(visible="Denoising strength" in d)),
                 (firstphase_width, "First pass size-1"),
                 (firstphase_height, "First pass size-2"),
+                (aesthetic_lr, "Aesthetic LR"),
+                (aesthetic_weight, "Aesthetic weight"),
+                (aesthetic_steps, "Aesthetic steps"),
+                (aesthetic_imgs, "Aesthetic embedding"),
+                (aesthetic_slerp, "Aesthetic slerp"),
+                (aesthetic_imgs_text, "Aesthetic text"),
+                (aesthetic_text_negative, "Aesthetic text negative"),
+                (aesthetic_slerp_angle, "Aesthetic slerp angle"),
             ]
 
             txt2img_preview_params = [
@@ -1077,6 +1087,14 @@ def create_ui(wrap_gradio_gpu_call):
                 (seed_resize_from_w, "Seed resize from-1"),
                 (seed_resize_from_h, "Seed resize from-2"),
                 (denoising_strength, "Denoising strength"),
+                (aesthetic_lr_im, "Aesthetic LR"),
+                (aesthetic_weight_im, "Aesthetic weight"),
+                (aesthetic_steps_im, "Aesthetic steps"),
+                (aesthetic_imgs_im, "Aesthetic embedding"),
+                (aesthetic_slerp_im, "Aesthetic slerp"),
+                (aesthetic_imgs_text_im, "Aesthetic text"),
+                (aesthetic_text_negative_im, "Aesthetic text negative"),
+                (aesthetic_slerp_angle_im, "Aesthetic slerp angle"),
             ]
             token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter])
 
diff --git a/style.css b/style.css
index 26ae36a5..5d2bacc9 100644
--- a/style.css
+++ b/style.css
@@ -477,7 +477,7 @@ input[type="range"]{
     padding: 0;
 }
 
-#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization{
+#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization, #refresh_aesthetic_embeddings{
     max-width: 2.5em;
     min-width: 2.5em;
     height: 2.4em;
-- 
cgit v1.2.3


From 704036ff07b71bf86cadcbbff2bcfeebdd1ed3a6 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Fri, 21 Oct 2022 17:11:42 +0300
Subject: make aspect ratio overlay work regardless of selected localization

---
 javascript/aspectRatioOverlay.js | 36 +++++++++++++++++-------------------
 javascript/dragdrop.js           |  2 +-
 modules/ui.py                    |  4 ++--
 3 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'modules/ui.py')

diff --git a/javascript/aspectRatioOverlay.js b/javascript/aspectRatioOverlay.js
index 96f1c00d..d3ca2781 100644
--- a/javascript/aspectRatioOverlay.js
+++ b/javascript/aspectRatioOverlay.js
@@ -3,12 +3,12 @@ let currentWidth = null;
 let currentHeight = null;
 let arFrameTimeout = setTimeout(function(){},0);
 
-function dimensionChange(e,dimname){
+function dimensionChange(e, is_width, is_height){
 
-	if(dimname == 'Width'){
+	if(is_width){
 		currentWidth = e.target.value*1.0
 	}
-	if(dimname == 'Height'){
+	if(is_height){
 		currentHeight = e.target.value*1.0
 	}
 
@@ -98,22 +98,20 @@ onUiUpdate(function(){
 	var inImg2img   = Boolean(gradioApp().querySelector("button.rounded-t-lg.border-gray-200"))
 	if(inImg2img){
 		let inputs = gradioApp().querySelectorAll('input');
-		inputs.forEach(function(e){ 
-			let parentLabel = e.parentElement.querySelector('label')
-			if(parentLabel && parentLabel.innerText){
-				if(!e.classList.contains('scrollwatch')){
-					if(parentLabel.innerText == 'Width' || parentLabel.innerText == 'Height'){
-						e.addEventListener('input', function(e){dimensionChange(e,parentLabel.innerText)} )
-						e.classList.add('scrollwatch')
-					}
-					if(parentLabel.innerText == 'Width'){
-						currentWidth = e.value*1.0
-					}
-					if(parentLabel.innerText == 'Height'){
-						currentHeight = e.value*1.0
-					}
-				}
-			} 
+		inputs.forEach(function(e){
+		    var is_width = e.parentElement.id == "img2img_width"
+		    var is_height = e.parentElement.id == "img2img_height"
+
+			if((is_width || is_height) && !e.classList.contains('scrollwatch')){
+				e.addEventListener('input', function(e){dimensionChange(e, is_width, is_height)} )
+				e.classList.add('scrollwatch')
+			}
+			if(is_width){
+				currentWidth = e.value*1.0
+			}
+			if(is_height){
+				currentHeight = e.value*1.0
+			}
 		})
 	}
 });
diff --git a/javascript/dragdrop.js b/javascript/dragdrop.js
index 070cf255..3ed1cb3c 100644
--- a/javascript/dragdrop.js
+++ b/javascript/dragdrop.js
@@ -43,7 +43,7 @@ function dropReplaceImage( imgWrap, files ) {
 window.document.addEventListener('dragover', e => {
     const target = e.composedPath()[0];
     const imgWrap = target.closest('[data-testid="image"]');
-    if ( !imgWrap && target.placeholder.indexOf("Prompt") == -1) {
+    if ( !imgWrap && target.placeholder && target.placeholder.indexOf("Prompt") == -1) {
         return;
     }
     e.stopPropagation();
diff --git a/modules/ui.py b/modules/ui.py
index 0d020de6..85f95792 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -879,8 +879,8 @@ def create_ui(wrap_gradio_gpu_call):
                 sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
 
                 with gr.Group():
-                    width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
-                    height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+                    width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512, elem_id="img2img_width")
+                    height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512, elem_id="img2img_height")
 
                 with gr.Row():
                     restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
-- 
cgit v1.2.3