3 files changed, 30 insertions, 2 deletions
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index f58a29b9..74a480e5 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -7,7 +7,7 @@ import inspect
 import k_diffusion.sampling
 import ldm.models.diffusion.ddim
 import ldm.models.diffusion.plms
-from modules import prompt_parser, devices, processing
+from modules import prompt_parser, devices, processing, images
 
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
@@ -89,6 +89,30 @@ def sample_to_image(samples):
     x_sample = x_sample.astype(np.uint8)
     return Image.fromarray(x_sample)
 
+def samples_to_image_grid(samples):
+    progress_images = []
+    for i in range(len(samples)):
+        # Decode the samples individually to reduce VRAM usage at the cost of a bit of speed.
+        x_sample = processing.decode_first_stage(shared.sd_model, samples[i:i+1])[0]
+        x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0)
+        x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
+        x_sample = x_sample.astype(np.uint8)
+        progress_images.append(Image.fromarray(x_sample))
+
+    return images.image_grid(progress_images)
+
+def samples_to_image_grid_combined(samples):
+    progress_images = []
+    # Decode all samples at once to increase speed at the cost of VRAM usage.
+    x_samples = processing.decode_first_stage(shared.sd_model, samples)
+    x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
+
+    for x_sample in x_samples:
+        x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
+        x_sample = x_sample.astype(np.uint8)
+        progress_images.append(Image.fromarray(x_sample))
+
+    return images.image_grid(progress_images)
 
 def store_latent(decoded):
     state.current_latent = decoded
diff --git a/modules/shared.py b/modules/shared.py
index d9cb65ef..95d6e225 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -294,6 +294,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"),
 options_templates.update(options_section(('ui', "User interface"), {
     "show_progressbar": OptionInfo(True, "Show progressbar"),
     "show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set 0 to disable.", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}),
+    "progress_decode_combined": OptionInfo(False, "Decode all progress images at once. (Slighty speeds up progress generation but consumes significantly more VRAM with large batches.)"),
     "return_grid": OptionInfo(True, "Show grid in results for web"),
     "do_not_show_images": OptionInfo(False, "Do not show any images in results for web"),
     "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"),
diff --git a/modules/ui.py b/modules/ui.py
index 56c233ab..de0abc7e 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -318,7 +318,10 @@ def check_progress_call(id_part):
         if shared.parallel_processing_allowed:
 
             if shared.state.sampling_step - shared.state.current_image_sampling_step >= opts.show_progress_every_n_steps and shared.state.current_latent is not None:
-                shared.state.current_image = modules.sd_samplers.sample_to_image(shared.state.current_latent)
+                if opts.progress_decode_combined:
+                    shared.state.current_image = modules.sd_samplers.samples_to_image_grid_combined(shared.state.current_latent)
+                else:
+                    shared.state.current_image = modules.sd_samplers.samples_to_image_grid(shared.state.current_latent)
                 shared.state.current_image_sampling_step = shared.state.sampling_step
 
         image = shared.state.current_image