12 files changed, 103 insertions, 32 deletions
diff --git a/.gitignore b/.gitignore
index 5381c515..78cf719e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ __pycache__
 /outputs
 /config.json
 /log
-webui.settings.bat
-\ No newline at end of file
+/webui.settings.bat
+/embeddings
diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py
index 2ed1d273..e86ad775 100644
--- a/modules/esrgan_model.py
+++ b/modules/esrgan_model.py
@@ -14,17 +14,20 @@ import modules.images
 
 def load_model(filename):
     # this code is adapted from https://github.com/xinntao/ESRGAN
-    if torch.has_mps:
-        map_l = 'cpu'
-    else:
-        map_l = None
-    pretrained_net = torch.load(filename, map_location=map_l)
+    pretrained_net = torch.load(filename, map_location='cpu' if torch.has_mps else None)
     crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32)
 
     if 'conv_first.weight' in pretrained_net:
         crt_model.load_state_dict(pretrained_net)
         return crt_model
 
+    if 'model.0.weight' not in pretrained_net:
+        is_realesrgan = "params_ema" in pretrained_net and 'body.0.rdb1.conv1.weight' in pretrained_net["params_ema"]
+        if is_realesrgan:
+            raise Exception("The file is a RealESRGAN model, it can't be used as a ESRGAN model.")
+        else:
+            raise Exception("The file is not a ESRGAN model.")
+
     crt_net = crt_model.state_dict()
     load_net_clean = {}
     for k, v in pretrained_net.items():
diff --git a/modules/img2img.py b/modules/img2img.py
index c2392305..0c91ef3f 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -11,7 +11,7 @@ from modules.ui import plaintext_to_html
 import modules.images as images
 import modules.scripts
 
-def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, mode: int, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int, upscaler_index: str, upscale_overlap: int, inpaint_full_res: bool, inpainting_mask_invert: int, *args):
+def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, mode: int, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, denoising_strength_change_factor: float, seed: int, height: int, width: int, resize_mode: int, upscaler_index: str, upscale_overlap: int, inpaint_full_res: bool, inpainting_mask_invert: int, *args):
     is_inpaint = mode == 1
     is_loopback = mode == 2
     is_upscale = mode == 3
@@ -50,8 +50,12 @@ def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index
         denoising_strength=denoising_strength,
         inpaint_full_res=inpaint_full_res,
         inpainting_mask_invert=inpainting_mask_invert,
-        extra_generation_params={"Denoising Strength": denoising_strength}
+        extra_generation_params={
+            "Denoising strength": denoising_strength,
+            "Denoising strength change factor": denoising_strength_change_factor
+        }
     )
+    print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
 
     if is_loopback:
         output_images, info = None, None
@@ -99,7 +103,7 @@ def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index
 
             p.init_images = [init_img]
             p.seed = processed.seed + 1
-            p.denoising_strength = max(p.denoising_strength * 0.95, 0.1)
+            p.denoising_strength = min(max(p.denoising_strength * denoising_strength_change_factor, 0.1), 1)
             history.append(processed.images[0])
 
         grid = images.image_grid(history, batch_size, rows=1)
@@ -168,5 +172,6 @@ def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index
         if processed is None:
             processed = process_images(p)
 
+    shared.total_tqdm.clear()
 
     return processed.images, processed.js(), plaintext_to_html(processed.info)
diff --git a/modules/realesrgan_model.py b/modules/realesrgan_model.py
index e480887f..e2cef0c8 100644
--- a/modules/realesrgan_model.py
+++ b/modules/realesrgan_model.py
@@ -5,7 +5,7 @@ import numpy as np
 from PIL import Image
 
 import modules.images
-from modules.shared import cmd_opts
+from modules.shared import cmd_opts, opts
 
 RealesrganModelInfo = namedtuple("RealesrganModelInfo", ["name", "location", "model", "netscale"])
 
@@ -76,7 +76,9 @@ def upscale_with_realesrgan(image, RealESRGAN_upscaling, RealESRGAN_model_index)
         scale=info.netscale,
         model_path=info.location,
         model=model,
-        half=not cmd_opts.no_half
+        half=not cmd_opts.no_half,
+        tile=opts.ESRGAN_tile,
+        tile_pad=opts.ESRGAN_tile_overlap,
     )
 
     upsampled = upsampler.enhance(np.array(image), outscale=RealESRGAN_upscaling)[0]
diff --git a/modules/scripts.py b/modules/scripts.py
index 89a0618d..74591bab 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -6,6 +6,7 @@ import modules.ui as ui
 import gradio as gr
 
 from modules.processing import StableDiffusionProcessing
+from modules import shared
 
 class Script:
     filename = None
@@ -137,6 +138,8 @@ class ScriptRunner:
         script_args = args[script.args_from:script.args_to]
         processed = script.run(p, *script_args)
 
+        shared.total_tqdm.clear()
+
         return processed
 
 
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 1084e248..db9952a5 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -73,11 +73,21 @@ class StableDiffusionModelHijack:
             name = os.path.splitext(filename)[0]
 
             data = torch.load(path)
-            param_dict = data['string_to_param']
-            if hasattr(param_dict, '_parameters'):
-                param_dict = getattr(param_dict, '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
-            assert len(param_dict) == 1, 'embedding file has multiple terms in it'
-            emb = next(iter(param_dict.items()))[1]
+
+            # textual inversion embeddings
+            if 'string_to_param' in data:
+                param_dict = data['string_to_param']
+                if hasattr(param_dict, '_parameters'):
+                    param_dict = getattr(param_dict, '_parameters')  # fix for torch 1.12.1 loading saved file from torch 1.11
+                assert len(param_dict) == 1, 'embedding file has multiple terms in it'
+                emb = next(iter(param_dict.items()))[1]
+            elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
+                assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
+
+                emb = next(iter(data.values()))
+                if len(emb.shape) == 1:
+                    emb = emb.unsqueeze(0)
+
             self.word_embeddings[name] = emb.detach()
             self.word_embeddings_checksums[name] = f'{const_hash(emb.reshape(-1))&0xffff:04x}'
 
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 140b5dea..6b7979e2 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -70,13 +70,14 @@ def extended_tdqm(sequence, *args, desc=None, **kwargs):
     state.sampling_steps = len(sequence)
     state.sampling_step = 0
 
-    for x in tqdm.tqdm(sequence, *args, desc=state.job, **kwargs):
+    for x in tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs):
         if state.interrupted:
             break
 
         yield x
 
         state.sampling_step += 1
+        shared.total_tqdm.update()
 
 
 ldm.models.diffusion.ddim.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs)
@@ -86,7 +87,7 @@ ldm.models.diffusion.plms.tqdm = lambda *args, desc=None, **kwargs: extended_tdq
 class VanillaStableDiffusionSampler:
     def __init__(self, constructor, sd_model):
         self.sampler = constructor(sd_model)
-        self.orig_p_sample_ddim = self.sampler.p_sample_ddim if hasattr(self.sampler, 'p_sample_ddim') else None
+        self.orig_p_sample_ddim = self.sampler.p_sample_ddim if hasattr(self.sampler, 'p_sample_ddim') else self.sampler.p_sample_plms
         self.mask = None
         self.nmask = None
         self.init_latent = None
@@ -112,6 +113,13 @@ class VanillaStableDiffusionSampler:
         return samples
 
     def sample(self, p, x, conditioning, unconditional_conditioning):
+        for fieldname in ['p_sample_ddim', 'p_sample_plms']:
+            if hasattr(self.sampler, fieldname):
+                setattr(self.sampler, fieldname, lambda x_dec, cond, ts, *args, **kwargs: p_sample_ddim_hook(self, x_dec, cond, ts, *args, **kwargs))
+        self.mask = None
+        self.nmask = None
+        self.init_latent = None
+
         samples_ddim, _ = self.sampler.sample(S=p.steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x)
         return samples_ddim
 
@@ -146,13 +154,14 @@ def extended_trange(count, *args, **kwargs):
     state.sampling_steps = count
     state.sampling_step = 0
 
-    for x in tqdm.trange(count, *args, desc=state.job, **kwargs):
+    for x in tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs):
         if state.interrupted:
             break
 
         yield x
 
         state.sampling_step += 1
+        shared.total_tqdm.update()
 
 
 class KDiffusionSampler:
@@ -168,6 +177,7 @@ class KDiffusionSampler:
     def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning):
         t_enc = int(min(p.denoising_strength, 0.999) * p.steps)
         sigmas = self.model_wrap.get_sigmas(p.steps)
+
         noise = noise * sigmas[p.steps - t_enc - 1]
 
         xi = x + noise
diff --git a/modules/shared.py b/modules/shared.py
index de7cbf02..07b288c2 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -1,9 +1,11 @@
+import sys
 import argparse
 import json
 import os
 
 import gradio as gr
 import torch
+import tqdm
 
 import modules.artists
 from modules.paths import script_path, sd_path
@@ -118,12 +120,13 @@ class Options:
         "font": OptionInfo(find_any_font(), "Font for image grids  that have text"),
         "enable_emphasis": OptionInfo(True, "Use (text) to make model pay more attention to text text and [text] to make it pay less attention"),
         "save_txt": OptionInfo(False, "Create a text file next to every image with generation parameters."),
-        "ESRGAN_tile": OptionInfo(192, "Tile size for ESRGAN upscaling. 0 = no tiling.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
-        "ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for ESRGAN upscaling. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}),
+        "ESRGAN_tile": OptionInfo(192, "Tile size for upscaling. 0 = no tiling.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
+        "ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for upscaling. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}),
         "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}),
         "upscale_at_full_resolution_padding": OptionInfo(16, "Inpainting at full resolution: padding, in pixels, for the masked region.", gr.Slider, {"minimum": 0, "maximum": 128, "step": 4}),
         "show_progressbar": OptionInfo(True, "Show progressbar"),
         "show_progress_every_n_steps": OptionInfo(0, "Show show image creation progress every N sampling steps. Set 0 to disable.", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}),
+        "multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job. Broken in PyCharm console."),
         "face_restoration_model": OptionInfo(None, "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}),
         "code_former_weight": OptionInfo(0.5, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}),
     }
@@ -165,4 +168,32 @@ sd_upscalers = []
 
 sd_model = None
 
+progress_print_out = sys.stdout
 
+
+class TotalTQDM:
+    def __init__(self):
+        self._tqdm = None
+
+    def reset(self):
+        self._tqdm = tqdm.tqdm(
+            desc="Total progress",
+            total=state.job_count * state.sampling_steps,
+            position=1,
+            file=progress_print_out
+        )
+
+    def update(self):
+        if not opts.multiple_tqdm:
+            return
+        if self._tqdm is None:
+            self.reset()
+        self._tqdm.update()
+
+    def clear(self):
+        if self._tqdm is not None:
+            self._tqdm.close()
+            self._tqdm = None
+
+
+total_tqdm = TotalTQDM()
diff --git a/modules/txt2img.py b/modules/txt2img.py
index fd81ff0f..410a7a7b 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -25,6 +25,7 @@ def txt2img(prompt: str, negative_prompt: str, steps: int, sampler_index: int, r
         tiling=tiling,
     )
 
+    print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
     processed = modules.scripts.scripts_txt2img.run(p, *args)
 
     if processed is not None:
@@ -32,5 +33,7 @@ def txt2img(prompt: str, negative_prompt: str, steps: int, sampler_index: int, r
     else:
         processed = process_images(p)
 
+    shared.total_tqdm.clear()
+
     return processed.images, processed.js(), plaintext_to_html(processed.info)
 
diff --git a/modules/ui.py b/modules/ui.py
index 5eb12b02..ccd2d853 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -349,7 +349,8 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
 
                 with gr.Group():
                     cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.0)
-                    denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising Strength', value=0.75)
+                    denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75)
+                    denoising_strength_change_factor = gr.Slider(minimum=0.9, maximum=1.1, step=0.01, label='Denoising strength change factor', value=1, visible=False)
 
                 with gr.Group():
                     height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
@@ -396,6 +397,7 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
                     sd_upscale_overlap: gr_show(is_upscale),
                     inpaint_full_res: gr_show(is_inpaint),
                     inpainting_mask_invert: gr_show(is_inpaint),
+                    denoising_strength_change_factor: gr_show(is_loopback),
                 }
 
             switch_mode.change(
@@ -412,6 +414,7 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
                     sd_upscale_overlap,
                     inpaint_full_res,
                     inpainting_mask_invert,
+                    denoising_strength_change_factor,
                 ]
             )
 
@@ -433,6 +436,7 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
                     batch_size,
                     cfg_scale,
                     denoising_strength,
+                    denoising_strength_change_factor,
                     seed,
                     height,
                     width,
diff --git a/script.js b/script.js
index f2cd8877..c1143a8a 100644
--- a/script.js
+++ b/script.js
@@ -1,8 +1,8 @@
 titles = {
-    "Sampling steps": "How many times to imptove the generated image itratively; higher values take longer; very low values can produce bad results",
+    "Sampling steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results",
     "Sampling method": "Which algorithm to use to produce the image",
 	"GFPGAN": "Restore low quality faces using GFPGAN neural network",
-	"Euler a": "Euler Ancestral - very creative, each can get acompletely different pictures depending on step count, setting seps tohigher than 30-40 does not help",
+	"Euler a": "Euler Ancestral - very creative, each can get a completely different picture depending on step count, setting steps to higher than 30-40 does not help",
 	"DDIM": "Denoising Diffusion Implicit Models - best at inpainting",
 
 	"Batch count": "How many batches of images to create",
@@ -11,7 +11,7 @@ titles = {
     "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result",
 
     "Inpaint a part of image": "Draw a mask over an image, and the script will regenerate the masked area with content according to prompt",
-    "Loopback": "Process an image, use it as an input, repeat. Batch count determings number of iterations.",
+    "Loopback": "Process an image, use it as an input, repeat. Batch count determins number of iterations.",
     "SD upscale": "Upscale image normally, split result into tiles, improve each tile using img2img, merge whole image back",
 
     "Just resize": "Resize image to target resolution. Unless height and width match, you will get incorrect aspect ratio.",
@@ -26,7 +26,8 @@ titles = {
     "latent nothing": "fill it with latent space zeroes",
     "Inpaint at full resolution": "Upscale masked region to target resolution, do inpainting, downscale back and paste into original image",
 
-    "Denoising Strength": "Determines how little respect the algorithm should have for image's content. At 0, nothing will change, and at 1 you'll get an unrelated image.",
+    "Denoising strength": "Determines how little respect the algorithm should have for image's content. At 0, nothing will change, and at 1 you'll get an unrelated image.",
+    "Denoising strength change factor": "In loopback mode, on each loop the denoising strength is multiplied by this value. <1 means decreasing variety so your sequence will converge on a fixed picture. >1 means increasing variety so your sequence will become more and more chaotic.",
 
     "Interrupt": "Stop processing images and return any results accumulated so far.",
     "Save": "Write image to a directory (default - log/images) and generation parameters into csv file.",
@@ -36,13 +37,13 @@ titles = {
 
     "None": "Do not do anything special",
     "Prompt matrix": "Separate prompts into parts using vertical pipe character (|) and the script will create a picture for every combination of them (except for the first part, which will be present in all combinations)",
-    "X/Y plot": "Create a grid where images will have different parameters. Use inputs below to specify which parameterswill be shared by columns and rows",
-    "Custom code": "Run python code. Advanced user only. Must run program with --allow-code for this to work",
+    "X/Y plot": "Create a grid where images will have different parameters. Use inputs below to specify which parameters will be shared by columns and rows",
+    "Custom code": "Run Python code. Advanced user only. Must run program with --allow-code for this to work",
 
     "Prompt S/R": "Separate a list of words with commas, and the first word will be used as a keyword: script will search for this word in the prompt, and replace it with others",
 
     "Tiling": "Produce an image that can be tiled.",
-    "Tile overlap": "For SD upscale, how much overlap in pixels should there be between tiles. Tils overlap so that when they are merged back into one oicture, there is no clearly visible seam.",
+    "Tile overlap": "For SD upscale, how much overlap in pixels should there be between tiles. Tiles overlap so that when they are merged back into one picture, there is no clearly visible seam.",
 
     "Roll": "Add a random artist to the prompt.",
 }
diff --git a/webui.py b/webui.py
index d846b843..c9421eec 100644
--- a/webui.py
+++ b/webui.py
@@ -153,6 +153,7 @@ def wrap_gradio_gpu_call(func):
 
     return modules.ui.wrap_gradio_call(f)
 
+modules.scripts.load_scripts(os.path.join(script_path, "scripts"))
 
 try:
     # this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start.
@@ -174,8 +175,6 @@ else:
 
 modules.sd_hijack.model_hijack.hijack(shared.sd_model)
 
-modules.scripts.load_scripts(os.path.join(script_path, "scripts"))
-
 
 def webui():
     # make the program just exit at ctrl+c without waiting for anything
@@ -194,6 +193,5 @@ def webui():
 
     demo.launch(share=cmd_opts.share, server_name="0.0.0.0" if cmd_opts.listen else None, server_port=cmd_opts.port)
 
-
 if __name__ == "__main__":
     webui()