4 files changed, 58 insertions, 34 deletions
diff --git a/modules/processing.py b/modules/processing.py
index fd7c7015..47712159 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -76,6 +76,24 @@ def apply_overlay(image, paste_loc, index, overlays):
     return image
 
 
+def txt2img_image_conditioning(sd_model, x, width, height):
+    if sd_model.model.conditioning_key not in {'hybrid', 'concat'}:
+        # Dummy zero conditioning if we're not using inpainting model.
+        # Still takes up a bit of memory, but no encoder call.
+        # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
+        return x.new_zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
+
+    # The "masked-image" in this case will just be all zeros since the entire image is masked.
+    image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
+    image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
+
+    # Add the fake full 1s mask to the first dimension.
+    image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+    image_conditioning = image_conditioning.to(x.dtype)
+
+    return image_conditioning
+
+
 class StableDiffusionProcessing():
     """
     The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
@@ -136,28 +154,12 @@ class StableDiffusionProcessing():
         self.all_negative_prompts = None
         self.all_seeds = None
         self.all_subseeds = None
+        self.iteration = 0
 
     def txt2img_image_conditioning(self, x, width=None, height=None):
-        if self.sampler.conditioning_key not in {'hybrid', 'concat'}:
-            # Dummy zero conditioning if we're not using inpainting model.
-            # Still takes up a bit of memory, but no encoder call.
-            # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
-            return x.new_zeros(x.shape[0], 5, 1, 1)
-
-        self.is_using_inpainting_conditioning = True
-
-        height = height or self.height
-        width = width or self.width
-
-        # The "masked-image" in this case will just be all zeros since the entire image is masked.
-        image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
-        image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
+        self.is_using_inpainting_conditioning = self.sd_model.model.conditioning_key in {'hybrid', 'concat'}
 
-        # Add the fake full 1s mask to the first dimension.
-        image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
-        image_conditioning = image_conditioning.to(x.dtype)
-
-        return image_conditioning
+        return txt2img_image_conditioning(self.sd_model, x, width or self.width, height or self.height)
 
     def depth2img_image_conditioning(self, source_image):
         # Use the AddMiDaS helper to Format our source image to suit the MiDaS model
@@ -420,7 +422,7 @@ def fix_seed(p):
     p.subseed = get_fixed_seed(p.subseed)
 
 
-def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration=0, position_in_batch=0):
+def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0):
     index = position_in_batch + iteration * p.batch_size
 
     clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers)
@@ -544,6 +546,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             state.job_count = p.n_iter
 
         for n in range(p.n_iter):
+            p.iteration = n
+
             if state.skipped:
                 state.skipped = False
 
@@ -707,7 +711,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             if not isinstance(image, Image.Image):
                 image = sd_samplers.sample_to_image(image, index, approximation=0)
 
-            images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, suffix="-before-highres-fix")
+            info = create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index)
+            images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, suffix="-before-highres-fix")
 
         if latent_scale_mode is not None:
             for i in range(samples.shape[0]):
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index edcbaf52..fa2cd4bb 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -35,26 +35,35 @@ def apply_optimizations():
 
     ldm.modules.diffusionmodules.model.nonlinearity = silu
     ldm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
+    
+    optimization_method = None
 
     if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)):
         print("Applying xformers cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward
+        optimization_method = 'xformers'
     elif cmd_opts.opt_split_attention_v1:
         print("Applying v1 cross attention optimization.")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
+        optimization_method = 'V1'
     elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()):
         if not invokeAI_mps_available and shared.device.type == 'mps':
             print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.")
             print("Applying v1 cross attention optimization.")
             ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
+            optimization_method = 'V1'
         else:
             print("Applying cross attention optimization (InvokeAI).")
             ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI
+            optimization_method = 'InvokeAI'
     elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()):
         print("Applying cross attention optimization (Doggettx).")
         ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward
         ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.cross_attention_attnblock_forward
+        optimization_method = 'Doggettx'
+
+    return optimization_method
 
 
 def undo_optimizations():
@@ -75,6 +84,7 @@ class StableDiffusionModelHijack:
     layers = None
     circular_enabled = False
     clip = None
+    optimization_method = None
 
     embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir)
 
@@ -94,7 +104,7 @@ class StableDiffusionModelHijack:
             m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self)
             m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
 
-        apply_optimizations()
+        self.optimization_method = apply_optimizations()
 
         self.clip = m.cond_stage_model
         
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 214db01c..2250e41b 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -251,6 +251,7 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat
     if save_model_every or create_image_every:
         assert log_directory, "Log directory is empty"
 
+
 def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     save_embedding_every = save_embedding_every or 0
     create_image_every = create_image_every or 0
@@ -325,7 +326,6 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
         else:
             print("No saved optimizer exists in checkpoint")
 
-
     scaler = torch.cuda.amp.GradScaler()
 
     batch_size = ds.batch_size
@@ -341,6 +341,9 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
     forced_filename = "<none>"
     embedding_yet_to_be_embedded = False
 
+    is_training_inpainting_model = shared.sd_model.model.conditioning_key in {'hybrid', 'concat'}
+    img_c = None
+
     pbar = tqdm.tqdm(total=steps - initial_step)
     try:
         for i in range((steps-initial_step) * gradient_step):
@@ -359,13 +362,18 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
                     break
 
                 with devices.autocast():
-                    # c = stack_conds(batch.cond).to(devices.device)
-                    # mask = torch.tensor(batch.emb_index).to(devices.device, non_blocking=pin_memory)
-                    # print(mask)
-                    # c[:, 1:1+embedding.vec.shape[0]] = embedding.vec.to(devices.device, non_blocking=pin_memory)
                     x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
                     c = shared.sd_model.cond_stage_model(batch.cond_text)
-                    loss = shared.sd_model(x, c)[0] / gradient_step
+
+                    if is_training_inpainting_model:
+                        if img_c is None:
+                            img_c = processing.txt2img_image_conditioning(shared.sd_model, c, training_width, training_height)
+
+                        cond = {"c_concat": [img_c], "c_crossattn": [c]}
+                    else:
+                        cond = c
+
+                    loss = shared.sd_model(x, cond)[0] / gradient_step
                     del x
 
                     _loss_step += loss.item()
diff --git a/modules/ui.py b/modules/ui.py
index bfc93634..184af7ad 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -162,16 +162,14 @@ def save_files(js_data, images, do_make_zip, index):
     return gr.File.update(value=fullfns, visible=True), plaintext_to_html(f"Saved: {filenames[0]}")
 
 
-
-
-def calc_time_left(progress, threshold, label, force_display):
+def calc_time_left(progress, threshold, label, force_display, show_eta):
     if progress == 0:
         return ""
     else:
         time_since_start = time.time() - shared.state.time_start
         eta = (time_since_start/progress)
         eta_relative = eta-time_since_start
-        if (eta_relative > threshold and progress > 0.02) or force_display:
+        if (eta_relative > threshold and show_eta) or force_display:
             if eta_relative > 3600:
                 return label + time.strftime('%H:%M:%S', time.gmtime(eta_relative))
             elif eta_relative > 60:
@@ -193,7 +191,10 @@ def check_progress_call(id_part):
     if shared.state.sampling_steps > 0:
         progress += 1 / shared.state.job_count * shared.state.sampling_step / shared.state.sampling_steps
 
-    time_left = calc_time_left( progress, 1, " ETA: ", shared.state.time_left_force_display )
+    # Show progress percentage and time left at the same moment, and base it also on steps done
+    show_eta = progress >= 0.01 or shared.state.sampling_step >= 10
+
+    time_left = calc_time_left(progress, 1, " ETA: ", shared.state.time_left_force_display, show_eta)
     if time_left != "":
         shared.state.time_left_force_display = True
 
@@ -201,7 +202,7 @@ def check_progress_call(id_part):
 
     progressbar = ""
     if opts.show_progressbar:
-        progressbar = f"""<div class='progressDiv'><div class='progress' style="overflow:visible;width:{progress * 100}%;white-space:nowrap;">{"&nbsp;" * 2 + str(int(progress*100))+"%" + time_left if progress > 0.01 else ""}</div></div>"""
+        progressbar = f"""<div class='progressDiv'><div class='progress' style="overflow:visible;width:{progress * 100}%;white-space:nowrap;">{"&nbsp;" * 2 + str(int(progress*100))+"%" + time_left if show_eta else ""}</div></div>"""
 
     image = gr_show(False)
     preview_visibility = gr_show(False)