aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--launch.py4
-rw-r--r--modules/esrgan_model.py4
-rw-r--r--modules/img2img.py4
-rw-r--r--modules/modelloader.py19
-rw-r--r--modules/scunet_model.py90
-rw-r--r--modules/scunet_model_arch.py265
-rw-r--r--modules/sd_hijack.py8
-rw-r--r--modules/sd_samplers.py8
-rw-r--r--modules/shared.py8
-rw-r--r--modules/textual_inversion/textual_inversion.py29
-rw-r--r--modules/textual_inversion/ui.py4
-rw-r--r--modules/txt2img.py4
-rw-r--r--modules/ui.py2
-rw-r--r--requirements.txt2
-rw-r--r--requirements_versions.txt1
-rw-r--r--webui.py17
16 files changed, 429 insertions, 40 deletions
diff --git a/launch.py b/launch.py
index d2793ed2..57405fea 100644
--- a/launch.py
+++ b/launch.py
@@ -15,6 +15,7 @@ requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt")
commandline_args = os.environ.get('COMMANDLINE_ARGS', "")
gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
+clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc")
taming_transformers_commit_hash = os.environ.get('TAMING_TRANSFORMERS_COMMIT_HASH', "24268930bf1dce879235a7fddd0b2355b84d7ea6")
@@ -111,6 +112,9 @@ if not skip_torch_cuda_test:
if not is_installed("gfpgan"):
run_pip(f"install {gfpgan_package}", "gfpgan")
+if not is_installed("clip"):
+ run_pip(f"install {clip_package}", "clip")
+
os.makedirs(dir_repos, exist_ok=True)
git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)
diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py
index ea91abfe..4aed9283 100644
--- a/modules/esrgan_model.py
+++ b/modules/esrgan_model.py
@@ -73,8 +73,8 @@ def fix_model_layers(crt_model, pretrained_net):
class UpscalerESRGAN(Upscaler):
def __init__(self, dirname):
self.name = "ESRGAN"
- self.model_url = "https://drive.google.com/u/0/uc?id=1TPrz5QKd8DHHt1k8SRtm6tMiPjz_Qene&export=download"
- self.model_name = "ESRGAN 4x"
+ self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/ESRGAN.pth"
+ self.model_name = "ESRGAN_4x"
self.scalers = []
self.user_path = dirname
self.model_path = os.path.join(models_path, self.name)
diff --git a/modules/img2img.py b/modules/img2img.py
index 03e934e9..f4455c90 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -103,7 +103,9 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
inpaint_full_res_padding=inpaint_full_res_padding,
inpainting_mask_invert=inpainting_mask_invert,
)
- print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
+
+ if shared.cmd_opts.enable_console_prompts:
+ print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
p.extra_generation_params["Mask blur"] = mask_blur
diff --git a/modules/modelloader.py b/modules/modelloader.py
index 015aeafa..b0f2f33d 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -5,7 +5,6 @@ import importlib
from urllib.parse import urlparse
from basicsr.utils.download_util import load_file_from_url
-
from modules import shared
from modules.upscaler import Upscaler
from modules.paths import script_path, models_path
@@ -121,16 +120,30 @@ def move_files(src_path: str, dest_path: str, ext_filter: str = None):
def load_upscalers():
+ sd = shared.script_path
+ # We can only do this 'magic' method to dynamically load upscalers if they are referenced,
+ # so we'll try to import any _model.py files before looking in __subclasses__
+ modules_dir = os.path.join(sd, "modules")
+ for file in os.listdir(modules_dir):
+ if "_model.py" in file:
+ model_name = file.replace("_model.py", "")
+ full_model = f"modules.{model_name}_model"
+ try:
+ importlib.import_module(full_model)
+ except:
+ pass
datas = []
+ c_o = vars(shared.cmd_opts)
for cls in Upscaler.__subclasses__():
name = cls.__name__
module_name = cls.__module__
module = importlib.import_module(module_name)
class_ = getattr(module, name)
- cmd_name = f"{name.lower().replace('upscaler', '')}-models-path"
+ cmd_name = f"{name.lower().replace('upscaler', '')}_models_path"
opt_string = None
try:
- opt_string = shared.opts.__getattr__(cmd_name)
+ if cmd_name in c_o:
+ opt_string = c_o[cmd_name]
except:
pass
scaler = class_(opt_string)
diff --git a/modules/scunet_model.py b/modules/scunet_model.py
new file mode 100644
index 00000000..7987ac14
--- /dev/null
+++ b/modules/scunet_model.py
@@ -0,0 +1,90 @@
+import os.path
+import sys
+import traceback
+
+import PIL.Image
+import numpy as np
+import torch
+from basicsr.utils.download_util import load_file_from_url
+
+import modules.upscaler
+from modules import shared, modelloader
+from modules.paths import models_path
+from modules.scunet_model_arch import SCUNet as net
+
+
+class UpscalerScuNET(modules.upscaler.Upscaler):
+ def __init__(self, dirname):
+ self.name = "ScuNET"
+ self.model_path = os.path.join(models_path, self.name)
+ self.model_name = "ScuNET GAN"
+ self.model_name2 = "ScuNET PSNR"
+ self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_gan.pth"
+ self.model_url2 = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_psnr.pth"
+ self.user_path = dirname
+ super().__init__()
+ model_paths = self.find_models(ext_filter=[".pth"])
+ scalers = []
+ add_model2 = True
+ for file in model_paths:
+ if "http" in file:
+ name = self.model_name
+ else:
+ name = modelloader.friendly_name(file)
+ if name == self.model_name2 or file == self.model_url2:
+ add_model2 = False
+ try:
+ scaler_data = modules.upscaler.UpscalerData(name, file, self, 4)
+ scalers.append(scaler_data)
+ except Exception:
+ print(f"Error loading ScuNET model: {file}", file=sys.stderr)
+ print(traceback.format_exc(), file=sys.stderr)
+ if add_model2:
+ scaler_data2 = modules.upscaler.UpscalerData(self.model_name2, self.model_url2, self)
+ scalers.append(scaler_data2)
+ self.scalers = scalers
+
+ def do_upscale(self, img: PIL.Image, selected_file):
+ torch.cuda.empty_cache()
+
+ model = self.load_model(selected_file)
+ if model is None:
+ return img
+
+ device = shared.device
+ img = np.array(img)
+ img = img[:, :, ::-1]
+ img = np.moveaxis(img, 2, 0) / 255
+ img = torch.from_numpy(img).float()
+ img = img.unsqueeze(0).to(shared.device)
+
+ img = img.to(device)
+ with torch.no_grad():
+ output = model(img)
+ output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
+ output = 255. * np.moveaxis(output, 0, 2)
+ output = output.astype(np.uint8)
+ output = output[:, :, ::-1]
+ torch.cuda.empty_cache()
+ return PIL.Image.fromarray(output, 'RGB')
+
+ def load_model(self, path: str):
+ device = shared.device
+ if "http" in path:
+ filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name,
+ progress=True)
+ else:
+ filename = path
+ if not os.path.exists(os.path.join(self.model_path, filename)) or filename is None:
+ print(f"ScuNET: Unable to load model from {filename}", file=sys.stderr)
+ return None
+
+ model = net(in_nc=3, config=[4, 4, 4, 4, 4, 4, 4], dim=64)
+ model.load_state_dict(torch.load(filename), strict=True)
+ model.eval()
+ for k, v in model.named_parameters():
+ v.requires_grad = False
+ model = model.to(device)
+
+ return model
+
diff --git a/modules/scunet_model_arch.py b/modules/scunet_model_arch.py
new file mode 100644
index 00000000..972a2639
--- /dev/null
+++ b/modules/scunet_model_arch.py
@@ -0,0 +1,265 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import torch
+import torch.nn as nn
+from einops import rearrange
+from einops.layers.torch import Rearrange
+from timm.models.layers import trunc_normal_, DropPath
+
+
+class WMSA(nn.Module):
+ """ Self-attention module in Swin Transformer
+ """
+
+ def __init__(self, input_dim, output_dim, head_dim, window_size, type):
+ super(WMSA, self).__init__()
+ self.input_dim = input_dim
+ self.output_dim = output_dim
+ self.head_dim = head_dim
+ self.scale = self.head_dim ** -0.5
+ self.n_heads = input_dim // head_dim
+ self.window_size = window_size
+ self.type = type
+ self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True)
+
+ self.relative_position_params = nn.Parameter(
+ torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads))
+
+ self.linear = nn.Linear(self.input_dim, self.output_dim)
+
+ trunc_normal_(self.relative_position_params, std=.02)
+ self.relative_position_params = torch.nn.Parameter(
+ self.relative_position_params.view(2 * window_size - 1, 2 * window_size - 1, self.n_heads).transpose(1,
+ 2).transpose(
+ 0, 1))
+
+ def generate_mask(self, h, w, p, shift):
+ """ generating the mask of SW-MSA
+ Args:
+ shift: shift parameters in CyclicShift.
+ Returns:
+ attn_mask: should be (1 1 w p p),
+ """
+ # supporting sqaure.
+ attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device)
+ if self.type == 'W':
+ return attn_mask
+
+ s = p - shift
+ attn_mask[-1, :, :s, :, s:, :] = True
+ attn_mask[-1, :, s:, :, :s, :] = True
+ attn_mask[:, -1, :, :s, :, s:] = True
+ attn_mask[:, -1, :, s:, :, :s] = True
+ attn_mask = rearrange(attn_mask, 'w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)')
+ return attn_mask
+
+ def forward(self, x):
+ """ Forward pass of Window Multi-head Self-attention module.
+ Args:
+ x: input tensor with shape of [b h w c];
+ attn_mask: attention mask, fill -inf where the value is True;
+ Returns:
+ output: tensor shape [b h w c]
+ """
+ if self.type != 'W': x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2))
+ x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size)
+ h_windows = x.size(1)
+ w_windows = x.size(2)
+ # sqaure validation
+ # assert h_windows == w_windows
+
+ x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size)
+ qkv = self.embedding_layer(x)
+ q, k, v = rearrange(qkv, 'b nw np (threeh c) -> threeh b nw np c', c=self.head_dim).chunk(3, dim=0)
+ sim = torch.einsum('hbwpc,hbwqc->hbwpq', q, k) * self.scale
+ # Adding learnable relative embedding
+ sim = sim + rearrange(self.relative_embedding(), 'h p q -> h 1 1 p q')
+ # Using Attn Mask to distinguish different subwindows.
+ if self.type != 'W':
+ attn_mask = self.generate_mask(h_windows, w_windows, self.window_size, shift=self.window_size // 2)
+ sim = sim.masked_fill_(attn_mask, float("-inf"))
+
+ probs = nn.functional.softmax(sim, dim=-1)
+ output = torch.einsum('hbwij,hbwjc->hbwic', probs, v)
+ output = rearrange(output, 'h b w p c -> b w p (h c)')
+ output = self.linear(output)
+ output = rearrange(output, 'b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c', w1=h_windows, p1=self.window_size)
+
+ if self.type != 'W': output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2),
+ dims=(1, 2))
+ return output
+
+ def relative_embedding(self):
+ cord = torch.tensor(np.array([[i, j] for i in range(self.window_size) for j in range(self.window_size)]))
+ relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1
+ # negative is allowed
+ return self.relative_position_params[:, relation[:, :, 0].long(), relation[:, :, 1].long()]
+
+
+class Block(nn.Module):
+ def __init__(self, input_dim, output_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
+ """ SwinTransformer Block
+ """
+ super(Block, self).__init__()
+ self.input_dim = input_dim
+ self.output_dim = output_dim
+ assert type in ['W', 'SW']
+ self.type = type
+ if input_resolution <= window_size:
+ self.type = 'W'
+
+ self.ln1 = nn.LayerNorm(input_dim)
+ self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type)
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+ self.ln2 = nn.LayerNorm(input_dim)
+ self.mlp = nn.Sequential(
+ nn.Linear(input_dim, 4 * input_dim),
+ nn.GELU(),
+ nn.Linear(4 * input_dim, output_dim),
+ )
+
+ def forward(self, x):
+ x = x + self.drop_path(self.msa(self.ln1(x)))
+ x = x + self.drop_path(self.mlp(self.ln2(x)))
+ return x
+
+
+class ConvTransBlock(nn.Module):
+ def __init__(self, conv_dim, trans_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
+ """ SwinTransformer and Conv Block
+ """
+ super(ConvTransBlock, self).__init__()
+ self.conv_dim = conv_dim
+ self.trans_dim = trans_dim
+ self.head_dim = head_dim
+ self.window_size = window_size
+ self.drop_path = drop_path
+ self.type = type
+ self.input_resolution = input_resolution
+
+ assert self.type in ['W', 'SW']
+ if self.input_resolution <= self.window_size:
+ self.type = 'W'
+
+ self.trans_block = Block(self.trans_dim, self.trans_dim, self.head_dim, self.window_size, self.drop_path,
+ self.type, self.input_resolution)
+ self.conv1_1 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
+ self.conv1_2 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
+
+ self.conv_block = nn.Sequential(
+ nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False),
+ nn.ReLU(True),
+ nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False)
+ )
+
+ def forward(self, x):
+ conv_x, trans_x = torch.split(self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1)
+ conv_x = self.conv_block(conv_x) + conv_x
+ trans_x = Rearrange('b c h w -> b h w c')(trans_x)
+ trans_x = self.trans_block(trans_x)
+ trans_x = Rearrange('b h w c -> b c h w')(trans_x)
+ res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1))
+ x = x + res
+
+ return x
+
+
+class SCUNet(nn.Module):
+ # def __init__(self, in_nc=3, config=[2, 2, 2, 2, 2, 2, 2], dim=64, drop_path_rate=0.0, input_resolution=256):
+ def __init__(self, in_nc=3, config=None, dim=64, drop_path_rate=0.0, input_resolution=256):
+ super(SCUNet, self).__init__()
+ if config is None:
+ config = [2, 2, 2, 2, 2, 2, 2]
+ self.config = config
+ self.dim = dim
+ self.head_dim = 32
+ self.window_size = 8
+
+ # drop path rate for each layer
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))]
+
+ self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)]
+
+ begin = 0
+ self.m_down1 = [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution)
+ for i in range(config[0])] + \
+ [nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)]
+
+ begin += config[0]
+ self.m_down2 = [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution // 2)
+ for i in range(config[1])] + \
+ [nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)]
+
+ begin += config[1]
+ self.m_down3 = [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution // 4)
+ for i in range(config[2])] + \
+ [nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)]
+
+ begin += config[2]
+ self.m_body = [ConvTransBlock(4 * dim, 4 * dim, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution // 8)
+ for i in range(config[3])]
+
+ begin += config[3]
+ self.m_up3 = [nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False), ] + \
+ [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution // 4)
+ for i in range(config[4])]
+
+ begin += config[4]
+ self.m_up2 = [nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False), ] + \
+ [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution // 2)
+ for i in range(config[5])]
+
+ begin += config[5]
+ self.m_up1 = [nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False), ] + \
+ [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
+ 'W' if not i % 2 else 'SW', input_resolution)
+ for i in range(config[6])]
+
+ self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)]
+
+ self.m_head = nn.Sequential(*self.m_head)
+ self.m_down1 = nn.Sequential(*self.m_down1)
+ self.m_down2 = nn.Sequential(*self.m_down2)
+ self.m_down3 = nn.Sequential(*self.m_down3)
+ self.m_body = nn.Sequential(*self.m_body)
+ self.m_up3 = nn.Sequential(*self.m_up3)
+ self.m_up2 = nn.Sequential(*self.m_up2)
+ self.m_up1 = nn.Sequential(*self.m_up1)
+ self.m_tail = nn.Sequential(*self.m_tail)
+ # self.apply(self._init_weights)
+
+ def forward(self, x0):
+
+ h, w = x0.size()[-2:]
+ paddingBottom = int(np.ceil(h / 64) * 64 - h)
+ paddingRight = int(np.ceil(w / 64) * 64 - w)
+ x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0)
+
+ x1 = self.m_head(x0)
+ x2 = self.m_down1(x1)
+ x3 = self.m_down2(x2)
+ x4 = self.m_down3(x3)
+ x = self.m_body(x4)
+ x = self.m_up3(x + x4)
+ x = self.m_up2(x + x3)
+ x = self.m_up1(x + x2)
+ x = self.m_tail(x + x1)
+
+ x = x[..., :h, :w]
+
+ return x
+
+ def _init_weights(self, m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=.02)
+ if m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0) \ No newline at end of file
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index fd57e5c5..3fa06242 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -130,7 +130,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
while i < len(tokens):
token = tokens[i]
- embedding = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+ embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
if embedding is None:
remade_tokens.append(token)
@@ -142,7 +142,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
remade_tokens += [0] * emb_len
multipliers += [weight] * emb_len
used_custom_terms.append((embedding.name, embedding.checksum()))
- i += emb_len
+ i += embedding_length_in_tokens
if len(remade_tokens) > maxlen - 2:
vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
@@ -213,7 +213,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
while i < len(tokens):
token = tokens[i]
- embedding = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+ embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
mult_change = self.token_mults.get(token) if opts.enable_emphasis else None
if mult_change is not None:
@@ -229,7 +229,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
remade_tokens += [0] * emb_len
multipliers += [mult] * emb_len
used_custom_terms.append((embedding.name, embedding.checksum()))
- i += emb_len
+ i += embedding_length_in_tokens
if len(remade_tokens) > maxlen - 2:
vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 92522214..9316875a 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -77,7 +77,9 @@ def extended_tdqm(sequence, *args, desc=None, **kwargs):
state.sampling_steps = len(sequence)
state.sampling_step = 0
- for x in tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs):
+ seq = sequence if cmd_opts.disable_console_progressbars else tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs)
+
+ for x in seq:
if state.interrupted:
break
@@ -207,7 +209,9 @@ def extended_trange(sampler, count, *args, **kwargs):
state.sampling_steps = count
state.sampling_step = 0
- for x in tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs):
+ seq = range(count) if cmd_opts.disable_console_progressbars else tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs)
+
+ for x in seq:
if state.interrupted:
break
diff --git a/modules/shared.py b/modules/shared.py
index ac0bc480..1bf7a6c1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -40,6 +40,7 @@ parser.add_argument("--gfpgan-models-path", type=str, help="Path to directory wi
parser.add_argument("--esrgan-models-path", type=str, help="Path to directory with ESRGAN model file(s).", default=os.path.join(model_path, 'ESRGAN'))
parser.add_argument("--bsrgan-models-path", type=str, help="Path to directory with BSRGAN model file(s).", default=os.path.join(model_path, 'BSRGAN'))
parser.add_argument("--realesrgan-models-path", type=str, help="Path to directory with RealESRGAN model file(s).", default=os.path.join(model_path, 'RealESRGAN'))
+parser.add_argument("--scunet-models-path", type=str, help="Path to directory with ScuNET model file(s).", default=os.path.join(model_path, 'ScuNET'))
parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(model_path, 'SwinIR'))
parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(model_path, 'LDSR'))
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
@@ -57,6 +58,9 @@ parser.add_argument("--opt-channelslast", action='store_true', help="change memo
parser.add_argument("--styles-file", type=str, help="filename to use for styles", default=os.path.join(script_path, 'styles.csv'))
parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False)
parser.add_argument("--use-textbox-seed", action='store_true', help="use textbox for seeds in UI (no up/down, but possible to input long seeds)", default=False)
+parser.add_argument("--disable-console-progressbars", action='store_true', help="do not output progressbars to console", default=False)
+parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False)
+
cmd_opts = parser.parse_args()
device = get_optimal_device()
@@ -319,14 +323,14 @@ class TotalTQDM:
)
def update(self):
- if not opts.multiple_tqdm:
+ if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
return
if self._tqdm is None:
self.reset()
self._tqdm.update()
def updateTotal(self, new_total):
- if not opts.multiple_tqdm:
+ if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
return
if self._tqdm is None:
self.reset()
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index c0baaace..1183aab7 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -7,7 +7,7 @@ import tqdm
import html
import datetime
-from modules import shared, devices, sd_hijack, processing
+from modules import shared, devices, sd_hijack, processing, sd_models
import modules.textual_inversion.dataset
@@ -17,6 +17,8 @@ class Embedding:
self.name = name
self.step = step
self.cached_checksum = None
+ self.sd_checkpoint = None
+ self.sd_checkpoint_name = None
def save(self, filename):
embedding_data = {
@@ -24,6 +26,8 @@ class Embedding:
"string_to_param": {"*": self.vec},
"name": self.name,
"step": self.step,
+ "sd_checkpoint": self.sd_checkpoint,
+ "sd_checkpoint_name": self.sd_checkpoint_name,
}
torch.save(embedding_data, filename)
@@ -41,6 +45,7 @@ class Embedding:
self.cached_checksum = f'{const_hash(self.vec.reshape(-1) * 100) & 0xffff:04x}'
return self.cached_checksum
+
class EmbeddingDatabase:
def __init__(self, embeddings_dir):
self.ids_lookup = {}
@@ -57,7 +62,8 @@ class EmbeddingDatabase:
first_id = ids[0]
if first_id not in self.ids_lookup:
self.ids_lookup[first_id] = []
- self.ids_lookup[first_id].append((ids, embedding))
+
+ self.ids_lookup[first_id] = sorted(self.ids_lookup[first_id] + [(ids, embedding)], key=lambda x: len(x[0]), reverse=True)
return embedding
@@ -95,6 +101,8 @@ class EmbeddingDatabase:
vec = emb.detach().to(devices.device, dtype=torch.float32)
embedding = Embedding(vec, name)
embedding.step = data.get('step', None)
+ embedding.sd_checkpoint = data.get('hash', None)
+ embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
self.register_embedding(embedding, shared.sd_model)
for fn in os.listdir(self.embeddings_dir):
@@ -117,24 +125,21 @@ class EmbeddingDatabase:
possible_matches = self.ids_lookup.get(token, None)
if possible_matches is None:
- return None
+ return None, None
for ids, embedding in possible_matches:
if tokens[offset:offset + len(ids)] == ids:
- return embedding
-
- return None
+ return embedding, len(ids)
+ return None, None
-def create_embedding(name, num_vectors_per_token):
- init_text = '*'
-
+def create_embedding(name, num_vectors_per_token, init_text='*'):
cond_model = shared.sd_model.cond_stage_model
embedding_layer = cond_model.wrapped.transformer.text_model.embeddings
ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"]
- embedded = embedding_layer(ids.to(devices.device)).squeeze(0)
+ embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device)
for i in range(num_vectors_per_token):
@@ -251,6 +256,10 @@ Last saved image: {html.escape(last_saved_image)}<br/>
</p>
"""
+ checkpoint = sd_models.select_checkpoint()
+
+ embedding.sd_checkpoint = checkpoint.hash
+ embedding.sd_checkpoint_name = checkpoint.model_name
embedding.cached_checksum = None
embedding.save(filename)
diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py
index ce3677a9..66c43ffb 100644
--- a/modules/textual_inversion/ui.py
+++ b/modules/textual_inversion/ui.py
@@ -6,8 +6,8 @@ import modules.textual_inversion.textual_inversion as ti
from modules import sd_hijack, shared
-def create_embedding(name, nvpt):
- filename = ti.create_embedding(name, nvpt)
+def create_embedding(name, initialization_text, nvpt):
+ filename = ti.create_embedding(name, nvpt, init_text=initialization_text)
sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings()
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 5368e4d0..d4406c3c 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -34,7 +34,9 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
denoising_strength=denoising_strength if enable_hr else None,
)
- print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
+ if cmd_opts.enable_console_prompts:
+ print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
+
processed = modules.scripts.scripts_txt2img.run(p, *args)
if processed is None:
diff --git a/modules/ui.py b/modules/ui.py
index b9ddaedc..c8f5bb84 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -964,6 +964,7 @@ def create_ui(wrap_gradio_gpu_call):
gr.HTML(value="<p style='margin-bottom: 0.7em'>Create a new embedding</p>")
new_embedding_name = gr.Textbox(label="Name")
+ initialization_text = gr.Textbox(label="Initialization text", value="*")
nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1)
with gr.Row():
@@ -1007,6 +1008,7 @@ def create_ui(wrap_gradio_gpu_call):
fn=modules.textual_inversion.ui.create_embedding,
inputs=[
new_embedding_name,
+ initialization_text,
nvpt,
],
outputs=[
diff --git a/requirements.txt b/requirements.txt
index 7cb9d329..d4b337fc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,14 +13,12 @@ Pillow
pytorch_lightning
realesrgan
scikit-image>=0.19
-git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379
timm==0.4.12
transformers==4.19.2
torch
einops
jsonmerge
clean-fid
-git+https://github.com/openai/CLIP@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1
resize-right
torchdiffeq
kornia
diff --git a/requirements_versions.txt b/requirements_versions.txt
index 1e8006e0..8a9acf20 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -18,7 +18,6 @@ piexif==1.1.3
einops==0.4.1
jsonmerge==1.8.0
clean-fid==0.1.29
-git+https://github.com/openai/CLIP@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1
resize-right==0.0.2
torchdiffeq==0.2.3
kornia==0.6.7
diff --git a/webui.py b/webui.py
index 19fdcdd4..424ab975 100644
--- a/webui.py
+++ b/webui.py
@@ -1,26 +1,23 @@
import os
-import threading
-
-from modules import devices
-from modules.paths import script_path
import signal
import threading
-import modules.paths
+
import modules.codeformer_model as codeformer
-import modules.esrgan_model as esrgan
-import modules.bsrgan_model as bsrgan
import modules.extras
import modules.face_restoration
import modules.gfpgan_model as gfpgan
-import modules.ldsr_model as ldsr
+import modules.img2img
+
import modules.lowvram
-import modules.realesrgan_model as realesrgan
+import modules.paths
import modules.scripts
import modules.sd_hijack
import modules.sd_models
import modules.shared as shared
-import modules.swinir_model as swinir
+import modules.txt2img
+
import modules.ui
+from modules import devices
from modules import modelloader
from modules.paths import script_path
from modules.shared import cmd_opts