diff options
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | webui.py | 12 |
2 files changed, 22 insertions, 4 deletions
@@ -248,3 +248,17 @@ print("Seed was: " + str(processed.seed)) display(processed.images, processed.seed, processed.info)
```
+
+### `--lowvram`
+Optimizations for GPUs with low VRAM. This should make it possible to generate 512x512 images on videocards with 4GB memory.
+
+The original idea of those ideas is by basujindal: https://github.com/basujindal/stable-diffusion. Model is separated into modules,
+and only one module is kept in GPU memory; when another module needs to run, the previous is removed from GPU memory.
+
+It should be obvious but the nature of those optimizations makes the processing run slower -- about 10 times slower
+compared to normal operation on my RTX 3090.
+
+This is an independent implementation that does not require any modification to original Stable Diffusion code, and
+with all code concenrated in one place rather than scattered around the program.
+
+
@@ -54,6 +54,7 @@ parser.add_argument("--max-batch-count", type=int, default=16, help="maximum bat parser.add_argument("--embeddings-dir", type=str, default='embeddings', help="embeddings dirtectory for textual inversion (default: embeddings)")
parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui")
parser.add_argument("--lowvram", action='store_true', help="enamble optimizations for low vram")
+parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast")
cmd_opts = parser.parse_args()
@@ -273,8 +274,9 @@ def create_random_tensors(shape, seeds): def torch_gc():
- torch.cuda.empty_cache()
- torch.cuda.ipc_collect()
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
def save_image(image, path, basename, seed=None, prompt=None, extension='png', info=None, short_filename=False):
@@ -528,6 +530,7 @@ def draw_xy_grid(xs, ys, x_label, y_label, cell): return grid
+
def resize_image(resize_mode, im, width, height):
if resize_mode == 0:
res = im.resize((width, height), resample=LANCZOS)
@@ -747,7 +750,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): if len(used_custom_terms) > 0:
self.hijack.comments.append("Used custom terms: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
- tokens = torch.asarray(remade_batch_tokens).to(self.wrapped.device)
+ tokens = torch.asarray(remade_batch_tokens).to(device)
outputs = self.wrapped.transformer(input_ids=tokens)
z = outputs.last_hidden_state
@@ -906,8 +909,9 @@ def process_images(p: StableDiffusionProcessing) -> Processed: model_hijack.load_textual_inversion_embeddings(cmd_opts.embeddings_dir, model)
output_images = []
+ precision_scope = autocast if cmd_opts.precision == "autocast" else nullcontext
ema_scope = (nullcontext if cmd_opts.lowvram else model.ema_scope)
- with torch.no_grad(), autocast("cuda"), ema_scope():
+ with torch.no_grad(), precision_scope("cuda"), ema_scope():
p.init()
for n in range(p.n_iter):
|