From 2ce52d32e41fb523d1494f45073fd18496e52d35 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 16:31:12 +0000 Subject: fix for #3086 failing to load any previous hypernet --- modules/hypernetworks/hypernetwork.py | 60 ++++++++++++++++------------------- 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d519cd9..74300122 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -24,11 +24,10 @@ class HypernetworkModule(torch.nn.Module): def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): super().__init__() - if layer_structure is not None: - assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" - assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - else: - layer_structure = parse_layer_structure(dim, state_dict) + + assert layer_structure is not None, "layer_structure mut not be None" + assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" linears = [] for i in range(len(layer_structure) - 1): @@ -39,23 +38,30 @@ class HypernetworkModule(torch.nn.Module): self.linear = torch.nn.Sequential(*linears) if state_dict is not None: - try: - self.load_state_dict(state_dict) - except RuntimeError: - self.try_load_previous(state_dict) + self.fix_old_state_dict(state_dict) + self.load_state_dict(state_dict) else: for layer in self.linear: - layer.weight.data.normal_(mean = 0.0, std = 0.01) + layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() self.to(devices.device) - def try_load_previous(self, state_dict): - states = self.state_dict() - states['linear.0.bias'].copy_(state_dict['linear1.bias']) - states['linear.0.weight'].copy_(state_dict['linear1.weight']) - states['linear.1.bias'].copy_(state_dict['linear2.bias']) - states['linear.1.weight'].copy_(state_dict['linear2.weight']) + def fix_old_state_dict(self, state_dict): + changes = { + 'linear1.bias': 'linear.0.bias', + 'linear1.weight': 'linear.0.weight', + 'linear2.bias': 'linear.1.bias', + 'linear2.weight': 'linear.1.weight', + } + + for fr, to in changes.items(): + x = state_dict.get(fr, None) + if x is None: + continue + + del state_dict[fr] + state_dict[to] = x def forward(self, x): return x + self.linear(x) * self.multiplier @@ -71,18 +77,6 @@ def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength -def parse_layer_structure(dim, state_dict): - i = 0 - layer_structure = [1] - - while (key := "linear.{}.weight".format(i)) in state_dict: - weight = state_dict[key] - layer_structure.append(len(weight) // dim) - i += 1 - - return layer_structure - - class Hypernetwork: filename = None name = None @@ -135,17 +129,18 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') + self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) + self.add_layer_norm = state_dict.get('is_layer_norm', False) + for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], state_dict["layer_structure"], state_dict["is_layer_norm"]), - HypernetworkModule(size, sd[1], state_dict["layer_structure"], state_dict["is_layer_norm"]), + HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm), ) self.name = state_dict.get('name', self.name) self.step = state_dict.get('step', 0) - self.layer_structure = state_dict.get('layer_structure', None) - self.add_layer_norm = state_dict.get('is_layer_norm', False) self.sd_checkpoint = state_dict.get('sd_checkpoint', None) self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) @@ -244,6 +239,7 @@ def stack_conds(conds): return torch.stack(conds) + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert hypernetwork_name, 'hypernetwork not selected' -- cgit v1.2.3 From d6ea5841374a28f3f6deb73abc251c8f0bcb240f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Thu, 20 Oct 2022 00:07:57 +0100 Subject: change html output --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d519cd9..73c1cb80 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -380,7 +380,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log Loss: {mean_loss:.7f}
Step: {hypernetwork.step}
Last prompt: {html.escape(entries[0].cond_text)}
-Last saved embedding: {html.escape(last_saved_file)}
+Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" -- cgit v1.2.3 From 6f98e89486f55b0e4657e96ce640cf1c4675d187 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Thu, 20 Oct 2022 00:10:45 +0000 Subject: update --- modules/hypernetworks/hypernetwork.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 74300122..7d617680 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -22,16 +22,20 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): + def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None): super().__init__() - assert layer_structure is not None, "layer_structure mut not be None" + assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + if activation_func == "relu": + linears.append(torch.nn.ReLU()) + if activation_func == "leakyrelu": + linears.append(torch.nn.LeakyReLU()) if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -42,8 +46,9 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - layer.weight.data.normal_(mean=0.0, std=0.01) - layer.bias.data.zero_() + if not "ReLU" in layer.__str__(): + layer.weight.data.normal_(mean=0.0, std=0.01) + layer.bias.data.zero_() self.to(devices.device) @@ -69,7 +74,8 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - layer_structure += [layer.weight, layer.bias] + if not "ReLU" in layer.__str__(): + layer_structure += [layer.weight, layer.bias] return layer_structure @@ -81,7 +87,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False, activation_func=None): self.filename = None self.name = name self.layers = {} @@ -90,11 +96,12 @@ class Hypernetwork: self.sd_checkpoint_name = None self.layer_structure = layer_structure self.add_layer_norm = add_layer_norm + self.activation_func = activation_func for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), ) def weights(self): @@ -117,6 +124,7 @@ class Hypernetwork: state_dict['name'] = self.name state_dict['layer_structure'] = self.layer_structure state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['activation_func'] = self.activation_func state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -131,12 +139,13 @@ class Hypernetwork: self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) self.add_layer_norm = state_dict.get('is_layer_norm', False) + self.activation_func = state_dict.get('activation_func', None) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm), - HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm, self.activation_func), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.3 From d8acd34f66ab35a91f10d66330bcc95a83bfcac6 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 20 Oct 2022 23:43:03 +0900 Subject: generalized some functions and option for ignoring first layer --- modules/hypernetworks/hypernetwork.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d617680..3a44b377 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -21,21 +21,27 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - + activation_dict = {"relu": torch.nn.ReLU, "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish} + def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None): super().__init__() assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - + linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - if activation_func == "relu": - linears.append(torch.nn.ReLU()) - if activation_func == "leakyrelu": - linears.append(torch.nn.LeakyReLU()) + # if skip_first_layer because first parameters potentially contain negative values + if i < 1: continue + if activation_func in HypernetworkModule.activation_dict: + linears.append(HypernetworkModule.activation_dict[activation_func]()) + else: + print("Invalid key {} encountered as activation function!".format(activation_func)) + # if use_dropout: + linears.append(torch.nn.Dropout(p=0.3)) if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -46,7 +52,7 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - if not "ReLU" in layer.__str__(): + if isinstance(layer, torch.nn.Linear): layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() @@ -298,7 +304,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + # if optimizer == "Adam": or else Adam / AdamW / etc... + optimizer = torch.optim.Adam(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: -- cgit v1.2.3 From a71e0212363979c7cbbb797c9fbd5f8cd03b29d3 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 20 Oct 2022 23:48:52 +0900 Subject: only linear --- modules/hypernetworks/hypernetwork.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3a44b377..905cbeef 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -35,13 +35,13 @@ class HypernetworkModule(torch.nn.Module): for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # if skip_first_layer because first parameters potentially contain negative values - if i < 1: continue + # if i < 1: continue if activation_func in HypernetworkModule.activation_dict: linears.append(HypernetworkModule.activation_dict[activation_func]()) else: print("Invalid key {} encountered as activation function!".format(activation_func)) # if use_dropout: - linears.append(torch.nn.Dropout(p=0.3)) + # linears.append(torch.nn.Dropout(p=0.3)) if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -80,7 +80,7 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - if not "ReLU" in layer.__str__(): + if isinstance(layer, torch.nn.Linear): layer_structure += [layer.weight, layer.bias] return layer_structure @@ -304,8 +304,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - # if optimizer == "Adam": or else Adam / AdamW / etc... - optimizer = torch.optim.Adam(weights, lr=scheduler.learn_rate) + # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... + optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: -- cgit v1.2.3 From 108be15500aac590b4e00420635d7b61fccfa530 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Fri, 21 Oct 2022 01:00:41 +0900 Subject: fix bugs and optimizations --- modules/hypernetworks/hypernetwork.py | 105 +++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 46 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 905cbeef..893ba110 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -36,14 +36,14 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # if skip_first_layer because first parameters potentially contain negative values # if i < 1: continue + if add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) if activation_func in HypernetworkModule.activation_dict: linears.append(HypernetworkModule.activation_dict[activation_func]()) else: print("Invalid key {} encountered as activation function!".format(activation_func)) # if use_dropout: # linears.append(torch.nn.Dropout(p=0.3)) - if add_layer_norm: - linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) self.linear = torch.nn.Sequential(*linears) @@ -115,11 +115,24 @@ class Hypernetwork: for k, layers in self.layers.items(): for layer in layers: - layer.train() res += layer.trainables() return res + def eval(self): + for k, layers in self.layers.items(): + for layer in layers: + layer.eval() + for items in self.weights(): + items.requires_grad = False + + def train(self): + for k, layers in self.layers.items(): + for layer in layers: + layer.train() + for items in self.weights(): + items.requires_grad = True + def save(self, filename): state_dict = {} @@ -290,10 +303,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.sd_model.first_stage_model.to(devices.cpu) hypernetwork = shared.loaded_hypernetwork - weights = hypernetwork.weights() - for weight in weights: - weight.requires_grad = True - losses = torch.zeros((32,)) last_saved_file = "" @@ -304,10 +313,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... - optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + optimizer = torch.optim.AdamW(hypernetwork.weights(), lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) + hypernetwork.train() for i, entries in pbar: hypernetwork.step = i + ititial_step @@ -328,8 +337,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() - optimizer.zero_grad() + optimizer.zero_grad(set_to_none=True) loss.backward() + del loss optimizer.step() mean_loss = losses.mean() if torch.isnan(mean_loss): @@ -346,44 +356,47 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: + torch.cuda.empty_cache() last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') + with torch.no_grad(): + hypernetwork.eval() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) - optimizer.zero_grad() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) - - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_index = preview_sampler_index - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 - - preview_text = p.prompt - - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None - - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) - - if image is not None: - shared.state.current_image = image - image.save(last_saved_image) - last_saved_image += f", prompt: {preview_text}" + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entries[0].cond_text + p.steps = 20 + + preview_text = p.prompt + + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + if image is not None: + shared.state.current_image = image + image.save(last_saved_image) + last_saved_image += f", prompt: {preview_text}" + + hypernetwork.train() shared.state.job_no = hypernetwork.step -- cgit v1.2.3 From f89829ec3a0baceb445451ad98d4fb4323e922aa Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 21 Oct 2022 01:37:11 +0900 Subject: Revert "fix bugs and optimizations" This reverts commit 108be15500aac590b4e00420635d7b61fccfa530. --- modules/hypernetworks/hypernetwork.py | 105 +++++++++++++++------------------- 1 file changed, 46 insertions(+), 59 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 893ba110..905cbeef 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -36,14 +36,14 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # if skip_first_layer because first parameters potentially contain negative values # if i < 1: continue - if add_layer_norm: - linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) if activation_func in HypernetworkModule.activation_dict: linears.append(HypernetworkModule.activation_dict[activation_func]()) else: print("Invalid key {} encountered as activation function!".format(activation_func)) # if use_dropout: # linears.append(torch.nn.Dropout(p=0.3)) + if add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) self.linear = torch.nn.Sequential(*linears) @@ -115,24 +115,11 @@ class Hypernetwork: for k, layers in self.layers.items(): for layer in layers: + layer.train() res += layer.trainables() return res - def eval(self): - for k, layers in self.layers.items(): - for layer in layers: - layer.eval() - for items in self.weights(): - items.requires_grad = False - - def train(self): - for k, layers in self.layers.items(): - for layer in layers: - layer.train() - for items in self.weights(): - items.requires_grad = True - def save(self, filename): state_dict = {} @@ -303,6 +290,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.sd_model.first_stage_model.to(devices.cpu) hypernetwork = shared.loaded_hypernetwork + weights = hypernetwork.weights() + for weight in weights: + weight.requires_grad = True + losses = torch.zeros((32,)) last_saved_file = "" @@ -313,10 +304,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - optimizer = torch.optim.AdamW(hypernetwork.weights(), lr=scheduler.learn_rate) + # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... + optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - hypernetwork.train() for i, entries in pbar: hypernetwork.step = i + ititial_step @@ -337,9 +328,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() - optimizer.zero_grad(set_to_none=True) + optimizer.zero_grad() loss.backward() - del loss optimizer.step() mean_loss = losses.mean() if torch.isnan(mean_loss): @@ -356,47 +346,44 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: - torch.cuda.empty_cache() last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') - with torch.no_grad(): - hypernetwork.eval() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_index = preview_sampler_index - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 - - preview_text = p.prompt - - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None - - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) - - if image is not None: - shared.state.current_image = image - image.save(last_saved_image) - last_saved_image += f", prompt: {preview_text}" - - hypernetwork.train() + optimizer.zero_grad() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entries[0].cond_text + p.steps = 20 + + preview_text = p.prompt + + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + if image is not None: + shared.state.current_image = image + image.save(last_saved_image) + last_saved_image += f", prompt: {preview_text}" shared.state.job_no = hypernetwork.step -- cgit v1.2.3 From c23f666dba2b484d521d2dc4be91cf9e09312647 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 09:47:43 +0300 Subject: a more strict check for activation type and a more reasonable check for type of layer in hypernets --- modules/hypernetworks/hypernetwork.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d617680..84e7e350 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -32,10 +32,16 @@ class HypernetworkModule(torch.nn.Module): linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + if activation_func == "relu": linears.append(torch.nn.ReLU()) - if activation_func == "leakyrelu": + elif activation_func == "leakyrelu": linears.append(torch.nn.LeakyReLU()) + elif activation_func == 'linear' or activation_func is None: + pass + else: + raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') + if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -46,7 +52,7 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - if not "ReLU" in layer.__str__(): + if type(layer) == torch.nn.Linear: layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() @@ -74,7 +80,7 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - if not "ReLU" in layer.__str__(): + if type(layer) == torch.nn.Linear: layer_structure += [layer.weight, layer.bias] return layer_structure -- cgit v1.2.3 From 5245c7a4935f67b677da0f5a1fc2b74c074aa0e2 Mon Sep 17 00:00:00 2001 From: timntorres Date: Wed, 19 Oct 2022 12:21:32 -0700 Subject: Issue #2921-Give PNG info to Hypernet previews. --- modules/hypernetworks/hypernetwork.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 84e7e350..68c8f26d 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -256,6 +256,9 @@ def stack_conds(conds): def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): + # images is required here to give training previews their infotext. Importing this at the very top causes a circular dependency. + from modules import images + assert hypernetwork_name, 'hypernetwork not selected' path = shared.hypernetworks.get(hypernetwork_name, None) @@ -298,6 +301,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log last_saved_file = "" last_saved_image = "" + forced_filename = "" ititial_step = hypernetwork.step or 0 if ititial_step > steps: @@ -345,7 +349,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: - last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') + forced_filename = f'{hypernetwork_name}-{hypernetwork.step}' + last_saved_image = os.path.join(images_dir, forced_filename) optimizer.zero_grad() shared.sd_model.cond_stage_model.to(devices.device) @@ -381,7 +386,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if image is not None: shared.state.current_image = image - image.save(last_saved_image) + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename) last_saved_image += f", prompt: {preview_text}" shared.state.job_no = hypernetwork.step -- cgit v1.2.3 From 4ff274e1e35bb642687253ce744d2cfa738ab293 Mon Sep 17 00:00:00 2001 From: timntorres Date: Wed, 19 Oct 2022 12:32:22 -0700 Subject: Revise comments. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 68c8f26d..3f96361c 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -256,7 +256,7 @@ def stack_conds(conds): def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): - # images is required here to give training previews their infotext. Importing this at the very top causes a circular dependency. + # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images assert hypernetwork_name, 'hypernetwork not selected' -- cgit v1.2.3 From 03a1e288c4973dd2dff57a97469b40f146b6fccf Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 10:13:24 +0300 Subject: turns out LayerNorm also has weight and bias and needs to be pre-multiplied and trained for hypernets --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3274a802..b1a5d0c7 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -52,7 +52,7 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - if type(layer) == torch.nn.Linear: + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() @@ -80,7 +80,7 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - if type(layer) == torch.nn.Linear: + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: layer_structure += [layer.weight, layer.bias] return layer_structure -- cgit v1.2.3 From 19818f023cfafc472c6c241cab0b72896a168481 Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 21 Oct 2022 02:14:02 -0700 Subject: Match hypernet name with filename in all cases. --- modules/hypernetworks/hypernetwork.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b1a5d0c7..6d392be4 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -340,7 +340,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar.set_description(f"loss: {mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: - last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt') + temp = hypernetwork.name + # Before saving, change name to match current checkpoint. + hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') hypernetwork.save(last_saved_file) textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { @@ -405,6 +408,9 @@ Last saved image: {html.escape(last_saved_image)}
hypernetwork.sd_checkpoint = checkpoint.hash hypernetwork.sd_checkpoint_name = checkpoint.model_name + # Before saving for the last time, change name back to the base name (as opposed to the save_hypernetwork_every step-suffixed naming convention). + hypernetwork.name = hypernetwork_name + filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork.name}.pt') hypernetwork.save(filename) return hypernetwork, filename -- cgit v1.2.3 From 272fa527bbe93143668ffc16838107b7dca35b40 Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 21 Oct 2022 02:41:55 -0700 Subject: Remove unused variable. --- modules/hypernetworks/hypernetwork.py | 1 - 1 file changed, 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 6d392be4..47d91ea5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -340,7 +340,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar.set_description(f"loss: {mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: - temp = hypernetwork.name # Before saving, change name to match current checkpoint. hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}' last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') -- cgit v1.2.3 From 0e8ca8e7af05be22d7d2c07a47c3c7febe0f0ab6 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 11:07:00 +0000 Subject: add dropout --- modules/hypernetworks/hypernetwork.py | 68 +++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 26 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 905cbeef..e493f366 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -1,47 +1,60 @@ +import csv import datetime import glob import html import os import sys import traceback -import tqdm -import csv +import modules.textual_inversion.dataset import torch - -from ldm.util import default -from modules import devices, shared, processing, sd_models -import torch -from torch import einsum +import tqdm from einops import rearrange, repeat -import modules.textual_inversion.dataset +from ldm.util import default +from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler +from torch import einsum class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - activation_dict = {"relu": torch.nn.ReLU, "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, - "swish": torch.nn.Hardswish} - - def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None): + activation_dict = { + "relu": torch.nn.ReLU, + "leakyrelu": torch.nn.LeakyReLU, + "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish, + } + + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - + assert activation_func not in self.activation_dict.keys() + "linear", f"Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" + linears = [] for i in range(len(layer_structure) - 1): + + # Add a fully-connected layer linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - # if skip_first_layer because first parameters potentially contain negative values - # if i < 1: continue - if activation_func in HypernetworkModule.activation_dict: - linears.append(HypernetworkModule.activation_dict[activation_func]()) + + # Add an activation func + if activation_func == "linear": + pass + elif activation_func in self.activation_dict: + linears.append(self.activation_dict[activation_func]()) else: - print("Invalid key {} encountered as activation function!".format(activation_func)) - # if use_dropout: - # linears.append(torch.nn.Dropout(p=0.3)) + raise NotImplementedError( + "Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" + ) + + # Add dropout + if use_dropout: + linears.append(torch.nn.Dropout(p=0.3)) + + # Add layer normalization if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -93,7 +106,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False, activation_func=None): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): self.filename = None self.name = name self.layers = {} @@ -101,13 +114,14 @@ class Hypernetwork: self.sd_checkpoint = None self.sd_checkpoint_name = None self.layer_structure = layer_structure - self.add_layer_norm = add_layer_norm self.activation_func = activation_func + self.add_layer_norm = add_layer_norm + self.use_dropout = use_dropout for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), ) def weights(self): @@ -129,8 +143,9 @@ class Hypernetwork: state_dict['step'] = self.step state_dict['name'] = self.name state_dict['layer_structure'] = self.layer_structure - state_dict['is_layer_norm'] = self.add_layer_norm state_dict['activation_func'] = self.activation_func + state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -144,8 +159,9 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) - self.add_layer_norm = state_dict.get('is_layer_norm', False) self.activation_func = state_dict.get('activation_func', None) + self.add_layer_norm = state_dict.get('is_layer_norm', False) + self.use_dropout = state_dict.get('use_dropout', False) for size, sd in state_dict.items(): if type(size) == int: -- cgit v1.2.3 From 7fd90128eb6d1820045bfe2c2c1269661023a712 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 22 Oct 2022 14:48:43 +0300 Subject: added a guard for hypernet training that will stop early if weights are getting no gradients --- modules/hypernetworks/hypernetwork.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 47d91ea5..46039a49 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -310,6 +310,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + steps_without_grad = 0 + pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step @@ -332,8 +334,17 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() optimizer.zero_grad() + weights[0].grad = None loss.backward() + + if weights[0].grad is None: + steps_without_grad += 1 + else: + steps_without_grad = 0 + assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' + optimizer.step() + mean_loss = losses.mean() if torch.isnan(mean_loss): raise RuntimeError("Loss diverged.") -- cgit v1.2.3 From fccba4729db341a299db3343e3264fecd9459a07 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 12:02:41 +0000 Subject: add an option to avoid dying relu --- modules/hypernetworks/hypernetwork.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b7a04038..3132a56c 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -32,7 +32,6 @@ class HypernetworkModule(torch.nn.Module): assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - assert activation_func not in self.activation_dict.keys() + "linear", f"Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" linears = [] for i in range(len(layer_structure) - 1): @@ -43,12 +42,13 @@ class HypernetworkModule(torch.nn.Module): # Add an activation func if activation_func == "linear" or activation_func is None: pass + # If ReLU, Skip adding it to the first layer to avoid dying ReLU + elif activation_func == "relu" and i < 1: + pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) else: - raise RuntimeError( - "Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" - ) + raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') # Add dropout if use_dropout: @@ -166,8 +166,8 @@ class Hypernetwork: for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm, self.activation_func), - HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.3 From 7912acef725832debef58c4c7bf8ec22fb446c0b Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 13:00:44 +0000 Subject: small fix --- modules/hypernetworks/hypernetwork.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3132a56c..7d12e0ff 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -42,22 +42,20 @@ class HypernetworkModule(torch.nn.Module): # Add an activation func if activation_func == "linear" or activation_func is None: pass - # If ReLU, Skip adding it to the first layer to avoid dying ReLU - elif activation_func == "relu" and i < 1: - pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) else: raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') - # Add dropout - if use_dropout: - linears.append(torch.nn.Dropout(p=0.3)) - # Add layer normalization if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) + # Add dropout + if use_dropout: + p = 0.5 if 0 <= i <= len(layer_structure) - 3 else 0.2 + linears.append(torch.nn.Dropout(p=p)) + self.linear = torch.nn.Sequential(*linears) if state_dict is not None: -- cgit v1.2.3 From 6a4fa73a38935a18779ce1809892730fd1572bee Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 13:44:39 +0000 Subject: small fix --- modules/hypernetworks/hypernetwork.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3372aae2..3bc71ee5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -51,10 +51,9 @@ class HypernetworkModule(torch.nn.Module): if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - # Add dropout - if use_dropout: - p = 0.5 if 0 <= i <= len(layer_structure) - 3 else 0.2 - linears.append(torch.nn.Dropout(p=p)) + # Add dropout expect last layer + if use_dropout and i < len(layer_structure) - 3: + linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) -- cgit v1.2.3 From 24694e5983d0944b901892cb101878e6dec89a20 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 01:57:58 +0900 Subject: Update hypernetwork.py --- modules/hypernetworks/hypernetwork.py | 55 ++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3bc71ee5..81132be4 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -16,6 +16,7 @@ from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum +from statistics import stdev, mean class HypernetworkModule(torch.nn.Module): multiplier = 1.0 @@ -268,6 +269,32 @@ def stack_conds(conds): return torch.stack(conds) +def log_statistics(loss_info:dict, key, value): + if key not in loss_info: + loss_info[key] = [value] + else: + loss_info[key].append(value) + if len(loss_info) > 1024: + loss_info.pop(0) + + +def statistics(data): + total_information = f"loss:{mean(data):.3f}"+u"\u00B1"+f"({stdev(data)/ (len(data)**0.5):.3f})" + recent_data = data[-32:] + recent_information = f"recent 32 loss:{mean(recent_data):.3f}"+u"\u00B1"+f"({stdev(recent_data)/ (len(recent_data)**0.5):.3f})" + return total_information, recent_information + + +def report_statistics(loss_info:dict): + keys = sorted(loss_info.keys(), key=lambda x: sum(loss_info[x]) / len(loss_info[x])) + for key in keys: + info, recent = statistics(loss_info[key]) + print("Loss statistics for file " + key) + print(info) + print(recent) + + + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images @@ -310,7 +337,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log for weight in weights: weight.requires_grad = True - losses = torch.zeros((32,)) + size = len(ds.indexes) + loss_dict = {} + losses = torch.zeros((size,)) + previous_mean_loss = 0 + print("Mean loss of {} elements".format(size)) last_saved_file = "" last_saved_image = "" @@ -329,7 +360,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step - + if loss_dict and i % size == 0: + previous_mean_loss = sum(i[-1] for i in loss_dict.values()) / len(loss_dict) + scheduler.apply(optimizer, hypernetwork.step) if scheduler.finished: break @@ -346,7 +379,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log del c losses[hypernetwork.step % losses.shape[0]] = loss.item() - + for entry in entries: + log_statistics(loss_dict, entry.filename, loss.item()) + optimizer.zero_grad() weights[0].grad = None loss.backward() @@ -359,10 +394,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer.step() - mean_loss = losses.mean() - if torch.isnan(mean_loss): + if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): raise RuntimeError("Loss diverged.") - pbar.set_description(f"loss: {mean_loss:.7f}") + pbar.set_description(f"dataset loss: {previous_mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. @@ -371,7 +405,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log hypernetwork.save(last_saved_file) textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { - "loss": f"{mean_loss:.7f}", + "loss": f"{previous_mean_loss:.7f}", "learn_rate": scheduler.learn_rate }) @@ -420,14 +454,15 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.state.textinfo = f"""

-Loss: {mean_loss:.7f}
+Loss: {previous_mean_loss:.7f}
Step: {hypernetwork.step}
Last prompt: {html.escape(entries[0].cond_text)}
Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" - + + report_statistics(loss_dict) checkpoint = sd_models.select_checkpoint() hypernetwork.sd_checkpoint = checkpoint.hash @@ -438,5 +473,3 @@ Last saved image: {html.escape(last_saved_image)}
hypernetwork.save(filename) return hypernetwork, filename - - -- cgit v1.2.3 From 48dbf99e84045ee7af55bc5b1b86492a240e631e Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 04:17:16 +0900 Subject: Allow tracking real-time loss Someone had 6000 images in their dataset, and it was shown as 0, which was confusing. This will allow tracking real time dataset-average loss for registered objects. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 81132be4..99fd0f8f 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -360,7 +360,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step - if loss_dict and i % size == 0: + if len(loss_dict) > 0: previous_mean_loss = sum(i[-1] for i in loss_dict.values()) / len(loss_dict) scheduler.apply(optimizer, hypernetwork.step) -- cgit v1.2.3 From 1fbfc052eb529d8cf8ce5baf578bcf93d0280c29 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 23 Oct 2022 05:43:34 +0100 Subject: Update hypernetwork.py --- modules/hypernetworks/hypernetwork.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 99fd0f8f..98a7b62e 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -288,10 +288,13 @@ def statistics(data): def report_statistics(loss_info:dict): keys = sorted(loss_info.keys(), key=lambda x: sum(loss_info[x]) / len(loss_info[x])) for key in keys: - info, recent = statistics(loss_info[key]) - print("Loss statistics for file " + key) - print(info) - print(recent) + try: + print("Loss statistics for file " + key) + info, recent = statistics(loss_info[key]) + print(info) + print(recent) + except Exception as e: + print(e) -- cgit v1.2.3 From b297cc3324979ec78d69b2d11dd18030dfad7bcc Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 20:06:42 +0900 Subject: Hypernetworks - fix KeyError in statistics caching Statistics logging has changed to {filename : list[losses]}, so it has to use loss_info[key].pop() --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 98a7b62e..33827210 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -274,8 +274,8 @@ def log_statistics(loss_info:dict, key, value): loss_info[key] = [value] else: loss_info[key].append(value) - if len(loss_info) > 1024: - loss_info.pop(0) + if len(loss_info[key]) > 1024: + loss_info[key].pop(0) def statistics(data): -- cgit v1.2.3 From 40b56c9289bf9458ae5ef3c1990ccea851c6c3e2 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:07:07 +0900 Subject: cleanup some code --- modules/hypernetworks/hypernetwork.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 33827210..4072bf54 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -16,6 +16,7 @@ from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum +from collections import defaultdict, deque from statistics import stdev, mean class HypernetworkModule(torch.nn.Module): @@ -269,15 +270,6 @@ def stack_conds(conds): return torch.stack(conds) -def log_statistics(loss_info:dict, key, value): - if key not in loss_info: - loss_info[key] = [value] - else: - loss_info[key].append(value) - if len(loss_info[key]) > 1024: - loss_info[key].pop(0) - - def statistics(data): total_information = f"loss:{mean(data):.3f}"+u"\u00B1"+f"({stdev(data)/ (len(data)**0.5):.3f})" recent_data = data[-32:] @@ -341,7 +333,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log weight.requires_grad = True size = len(ds.indexes) - loss_dict = {} + loss_dict = defaultdict(lambda : deque(maxlen = 1024)) losses = torch.zeros((size,)) previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) @@ -383,7 +375,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() for entry in entries: - log_statistics(loss_dict, entry.filename, loss.item()) + loss_dict[entry.filename].append(loss.item()) optimizer.zero_grad() weights[0].grad = None -- cgit v1.2.3 From 348f89c8d40397c1875cff4a7331018785f9c3b8 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:29:53 +0900 Subject: statistics for pbar --- modules/hypernetworks/hypernetwork.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 4072bf54..48b56029 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -335,6 +335,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log size = len(ds.indexes) loss_dict = defaultdict(lambda : deque(maxlen = 1024)) losses = torch.zeros((size,)) + previous_mean_losses = [0] previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) @@ -356,7 +357,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log for i, entries in pbar: hypernetwork.step = i + ititial_step if len(loss_dict) > 0: - previous_mean_loss = sum(i[-1] for i in loss_dict.values()) / len(loss_dict) + previous_mean_losses = [i[-1] for i in loss_dict.values()] + previous_mean_loss = mean(previous_mean_losses) scheduler.apply(optimizer, hypernetwork.step) if scheduler.finished: @@ -391,7 +393,13 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): raise RuntimeError("Loss diverged.") - pbar.set_description(f"dataset loss: {previous_mean_loss:.7f}") + + if len(previous_mean_losses) > 1: + std = stdev(previous_mean_losses) + else: + std = 0 + dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" + pbar.set_description(dataset_loss_info) if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. -- cgit v1.2.3 From 0d2e1dac407a0e2f5b148d314715f0457b2525b7 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:41:39 +0900 Subject: convert deque -> list I don't feel this being efficient --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 48b56029..fb510fa7 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -282,7 +282,7 @@ def report_statistics(loss_info:dict): for key in keys: try: print("Loss statistics for file " + key) - info, recent = statistics(loss_info[key]) + info, recent = statistics(list(loss_info[key])) print(info) print(recent) except Exception as e: -- cgit v1.2.3 From e9a410b5357612f63528015c5533c2185dcff92e Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:47:39 +0900 Subject: check length for variance --- modules/hypernetworks/hypernetwork.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index fb510fa7..d647ea55 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -271,9 +271,17 @@ def stack_conds(conds): def statistics(data): - total_information = f"loss:{mean(data):.3f}"+u"\u00B1"+f"({stdev(data)/ (len(data)**0.5):.3f})" + if len(data) < 2: + std = 0 + else: + std = stdev(data) + total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std/ (len(data) ** 0.5):.3f})" recent_data = data[-32:] - recent_information = f"recent 32 loss:{mean(recent_data):.3f}"+u"\u00B1"+f"({stdev(recent_data)/ (len(recent_data)**0.5):.3f})" + if len(recent_data) < 2: + std = 0 + else: + std = stdev(recent_data) + recent_information = f"recent 32 loss:{mean(recent_data):.3f}" + u"\u00B1" + f"({std / (len(recent_data) ** 0.5):.3f})" return total_information, recent_information -- cgit v1.2.3 From 2f4c91894d4c0a055c1069b2fda0e4da8fcda188 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 12:10:30 +0800 Subject: Remove activation from final layer of HNs --- modules/hypernetworks/hypernetwork.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index d647ea55..54346b64 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -41,8 +41,8 @@ class HypernetworkModule(torch.nn.Module): # Add a fully-connected layer linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - # Add an activation func - if activation_func == "linear" or activation_func is None: + # Add an activation func except last layer + if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 3: pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) @@ -53,7 +53,7 @@ class HypernetworkModule(torch.nn.Module): if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - # Add dropout expect last layer + # Add dropout except last layer if use_dropout and i < len(layer_structure) - 3: linears.append(torch.nn.Dropout(p=0.3)) -- cgit v1.2.3 From c702d4d0df21790199d199818f25c449213ffe0f Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 13:43:04 +0800 Subject: Fix off-by-one --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 54346b64..3ce85bb5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -42,7 +42,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # Add an activation func except last layer - if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 3: + if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 2: pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) @@ -54,7 +54,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 3: + if use_dropout and i < len(layer_structure) - 2: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) -- cgit v1.2.3 From de096d0ce752c96e45508dcc7b9e84f7dbe10cca Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Tue, 25 Oct 2022 14:48:49 +0900 Subject: Weight initialization and More activation func add weight init add weight init option in create_hypernetwork fstringify hypernet info save weight initialization info for further debugging fill bias with zero for He/Xavier initialize LayerNorm with Normal fix loading weight_init --- modules/hypernetworks/hypernetwork.py | 47 ++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index d647ea55..afbcdff8 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -5,6 +5,7 @@ import html import os import sys import traceback +import inspect import modules.textual_inversion.dataset import torch @@ -15,10 +16,12 @@ from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum +from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_ from collections import defaultdict, deque from statistics import stdev, mean + class HypernetworkModule(torch.nn.Module): multiplier = 1.0 activation_dict = { @@ -26,9 +29,12 @@ class HypernetworkModule(torch.nn.Module): "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, "swish": torch.nn.Hardswish, + "tanh": torch.nn.Tanh, + "sigmoid": torch.nn.Sigmoid, } + activation_dict.update({cls_name: cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -65,9 +71,24 @@ class HypernetworkModule(torch.nn.Module): else: for layer in self.linear: if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: - layer.weight.data.normal_(mean=0.0, std=0.01) - layer.bias.data.zero_() - + w, b = layer.weight.data, layer.bias.data + if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: + normal_(w, mean=0.0, std=0.01) + normal_(b, mean=0.0, std=0.005) + elif weight_init == 'XavierUniform': + xavier_uniform_(w) + zeros_(b) + elif weight_init == 'XavierNormal': + xavier_normal_(w) + zeros_(b) + elif weight_init == 'KaimingUniform': + kaiming_uniform_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + elif weight_init == 'KaimingNormal': + kaiming_normal_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + else: + raise KeyError(f"Key {weight_init} is not defined as initialization!") self.to(devices.device) def fix_old_state_dict(self, state_dict): @@ -105,7 +126,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False): self.filename = None self.name = name self.layers = {} @@ -114,13 +135,14 @@ class Hypernetwork: self.sd_checkpoint_name = None self.layer_structure = layer_structure self.activation_func = activation_func + self.weight_init = weight_init self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), ) def weights(self): @@ -144,6 +166,7 @@ class Hypernetwork: state_dict['layer_structure'] = self.layer_structure state_dict['activation_func'] = self.activation_func state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['weight_initialization'] = self.weight_init state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -158,15 +181,21 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) + print(self.layer_structure) self.activation_func = state_dict.get('activation_func', None) + print(f"Activation function is {self.activation_func}") + self.weight_init = state_dict.get('weight_initialization', 'Normal') + print(f"Weight initialization is {self.weight_init}") self.add_layer_norm = state_dict.get('is_layer_norm', False) + print(f"Layer norm is set to {self.add_layer_norm}") self.use_dropout = state_dict.get('use_dropout', False) + print(f"Dropout usage is set to {self.use_dropout}" ) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.3 From 7207e3bf49ed000464d288cd67e02f0ba8614dc3 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Tue, 25 Oct 2022 15:24:59 +0900 Subject: remove duplicate keys and lowercase --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index afbcdff8..842b6447 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -32,7 +32,7 @@ class HypernetworkModule(torch.nn.Module): "tanh": torch.nn.Tanh, "sigmoid": torch.nn.Sigmoid, } - activation_dict.update({cls_name: cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) + activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False): super().__init__() -- cgit v1.2.3 From 877d94f97ca5491d8779440769b191e0dcd32c8e Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 14:50:58 +0800 Subject: Back compatibility --- modules/hypernetworks/hypernetwork.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3ce85bb5..dd317085 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -28,7 +28,7 @@ class HypernetworkModule(torch.nn.Module): "swish": torch.nn.Hardswish, } - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False, activate_output=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -42,7 +42,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # Add an activation func except last layer - if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 2: + if activation_func == "linear" or activation_func is None or (i >= len(layer_structure) - 2 and not activate_output): pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) @@ -105,7 +105,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False, activate_output=False): self.filename = None self.name = name self.layers = {} @@ -116,11 +116,12 @@ class Hypernetwork: self.activation_func = activation_func self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout + self.activate_output = activate_output for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), ) def weights(self): @@ -147,6 +148,7 @@ class Hypernetwork: state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name + state_dict['activate_output'] = self.activate_output torch.save(state_dict, filename) @@ -161,12 +163,13 @@ class Hypernetwork: self.activation_func = state_dict.get('activation_func', None) self.add_layer_norm = state_dict.get('is_layer_norm', False) self.use_dropout = state_dict.get('use_dropout', False) + self.activate_output = state_dict.get('activate_output', True) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.3 From 91bb35b1e6842b30ce7553009c8ecea3643de8d2 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 15:00:03 +0800 Subject: Merge fix --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index eab8b32f..bd171793 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -190,7 +190,7 @@ class Hypernetwork: print(f"Weight initialization is {self.weight_init}") self.add_layer_norm = state_dict.get('is_layer_norm', False) print(f"Layer norm is set to {self.add_layer_norm}") - self.use_dropout = state_dict.get('use_dropout', False + self.use_dropout = state_dict.get('use_dropout', False) print(f"Dropout usage is set to {self.use_dropout}" ) self.activate_output = state_dict.get('activate_output', True) -- cgit v1.2.3 From a524d137d0a89bb19a6676dc9b8fbb5d1b580678 Mon Sep 17 00:00:00 2001 From: timntorres Date: Mon, 24 Oct 2022 23:48:05 -0700 Subject: patch bug (SeverianVoid's comment on 5245c7a) --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 842b6447..8113b35b 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -487,7 +487,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if image is not None: shared.state.current_image = image - last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename) + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) last_saved_image += f", prompt: {preview_text}" shared.state.job_no = hypernetwork.step -- cgit v1.2.3 From b6a8bb123bd519736306417399f6441e504f1e8b Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 15:15:19 +0800 Subject: Fix merge --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index bd171793..2997cead 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -60,7 +60,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 2: + if use_dropout and i < len(layer_structure) - 3: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) @@ -126,7 +126,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False) + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False): self.filename = None self.name = name self.layers = {} -- cgit v1.2.3 From 85fcccc105aa50f1d78de559233eaa9f384608b5 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Wed, 26 Oct 2022 22:24:33 +0900 Subject: Squashed commit of fixing dropout silently fix dropouts for future hypernetworks add kwargs for Hypernetwork class hypernet UI for gradio input add recommended options remove as options revert adding options in ui --- modules/hypernetworks/hypernetwork.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 2997cead..dd921153 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -34,7 +34,8 @@ class HypernetworkModule(torch.nn.Module): } activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False, activate_output=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', + add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -60,7 +61,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 3: + if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) @@ -126,7 +127,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): self.filename = None self.name = name self.layers = {} @@ -139,11 +140,14 @@ class Hypernetwork: self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout self.activate_output = activate_output + self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) def weights(self): @@ -172,7 +176,8 @@ class Hypernetwork: state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name state_dict['activate_output'] = self.activate_output - + state_dict['last_layer_dropout'] = self.last_layer_dropout + torch.save(state_dict, filename) def load(self, filename): @@ -193,12 +198,16 @@ class Hypernetwork: self.use_dropout = state_dict.get('use_dropout', False) print(f"Dropout usage is set to {self.use_dropout}" ) self.activate_output = state_dict.get('activate_output', True) + print(f"Activate last layer is set to {self.activate_output}") + self.last_layer_dropout = state_dict.get('last_layer_dropout', False) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.3 From cc56df996e95c2c82295ab7b9928da2544791220 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 23:51:51 +0800 Subject: Fix dropout logic --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index dd921153..b17598fe 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -35,7 +35,7 @@ class HypernetworkModule(torch.nn.Module): activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', - add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): + add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=True): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -61,7 +61,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2: + if use_dropout and (i < len(layer_structure) - 3 or last_layer_dropout and i < len(layer_structure) - 2): linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) -- cgit v1.2.3 From 029d7c75436558f1e884bb127caed73caaecb83a Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:44:53 +0900 Subject: Revert unresolved changes in Bias initialization it should be zeros_ or parameterized in future properly. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b17598fe..25427a37 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -75,7 +75,7 @@ class HypernetworkModule(torch.nn.Module): w, b = layer.weight.data, layer.bias.data if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: normal_(w, mean=0.0, std=0.01) - normal_(b, mean=0.0, std=0.005) + normal_(b, mean=0.0, std=0) elif weight_init == 'XavierUniform': xavier_uniform_(w) zeros_(b) -- cgit v1.2.3 From 2a25729623717cc499e873752d9f4ebebd1e1078 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Fri, 28 Oct 2022 09:44:56 +0700 Subject: Gradient clipping in train tab --- modules/hypernetworks/hypernetwork.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8113b35b..c5d60654 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -327,7 +327,7 @@ def report_statistics(loss_info:dict): -def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images @@ -384,6 +384,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if ititial_step > steps: return hypernetwork, filename + clip_grad_mode_value = clip_grad_mode == "value" + clip_grad_mode_norm = clip_grad_mode == "norm" + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) @@ -426,6 +429,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log steps_without_grad = 0 assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' + if clip_grad_mode_value: + torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_value) + elif clip_grad_mode_norm: + torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_value) + optimizer.step() if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): -- cgit v1.2.3 From b2a8b263b2f09bd772f75502c5a83656580f34ec Mon Sep 17 00:00:00 2001 From: benkyoujouzu Date: Thu, 27 Oct 2022 13:00:47 +0800 Subject: Add missing support for linear activation in hypernetwork --- modules/hypernetworks/hypernetwork.py | 1 + 1 file changed, 1 insertion(+) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8113b35b..87cf3cf3 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -25,6 +25,7 @@ from statistics import stdev, mean class HypernetworkModule(torch.nn.Module): multiplier = 1.0 activation_dict = { + "linear": torch.nn.Identity, "relu": torch.nn.ReLU, "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, -- cgit v1.2.3 From db5a354c489bfd1c95e0bbf9af12ab8b5d6fe170 Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 28 Oct 2022 01:41:57 -0700 Subject: Always ignore "None.pt" in the hypernet directory. --- modules/hypernetworks/hypernetwork.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8113b35b..cd920df5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -208,13 +208,16 @@ def list_hypernetworks(path): res = {} for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True): name = os.path.splitext(os.path.basename(filename))[0] - res[name] = filename + # Prevent a hypothetical "None.pt" from being listed. + if name != "None": + res[name] = filename return res def load_hypernetwork(filename): path = shared.hypernetworks.get(filename, None) - if path is not None: + # Prevent any file named "None.pt" from being loaded. + if path is not None and filename != "None": print(f"Loading hypernetwork {filename}") try: shared.loaded_hypernetwork = Hypernetwork() -- cgit v1.2.3 From 16451ca573220e49f2eaaab97580b6b91287c8c4 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Fri, 28 Oct 2022 17:16:23 +0700 Subject: Learning rate sched syntax support for grad clipping --- modules/hypernetworks/hypernetwork.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index c5d60654..86532063 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -383,11 +383,15 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log ititial_step = hypernetwork.step or 0 if ititial_step > steps: return hypernetwork, filename - + clip_grad_mode_value = clip_grad_mode == "value" clip_grad_mode_norm = clip_grad_mode == "norm" + clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm + if clip_grad_enabled: + clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False) scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) @@ -407,6 +411,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if shared.state.interrupted: break + if clip_grad_enabled: + clip_grad_sched.step(hypernetwork.step) + with torch.autocast("cuda"): c = stack_conds([entry.cond for entry in entries]).to(devices.device) # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) @@ -430,9 +437,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' if clip_grad_mode_value: - torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_value) + torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_sched.learn_rate) elif clip_grad_mode_norm: - torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_value) + torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_sched.learn_rate) optimizer.step() -- cgit v1.2.3 From 9ceef81f77ecce89f0c8f412c4d849210d852e82 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Fri, 28 Oct 2022 20:48:08 +0700 Subject: Fix log off by 1 --- modules/hypernetworks/hypernetwork.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8113b35b..a0297997 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -428,7 +428,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer.step() - if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): + steps_done = hypernetwork.step + 1 + + if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): raise RuntimeError("Loss diverged.") if len(previous_mean_losses) > 1: @@ -438,9 +440,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" pbar.set_description(dataset_loss_info) - if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: + if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. - hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}' + hypernetwork.name = f'{hypernetwork_name}-{steps_done}' last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') hypernetwork.save(last_saved_file) @@ -449,8 +451,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log "learn_rate": scheduler.learn_rate }) - if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: - forced_filename = f'{hypernetwork_name}-{hypernetwork.step}' + if images_dir is not None and steps_done % create_image_every == 0: + forced_filename = f'{hypernetwork_name}-{steps_done}' last_saved_image = os.path.join(images_dir, forced_filename) optimizer.zero_grad() -- cgit v1.2.3 From ab27c111d06ec920791c73eea25ad9a61671852e Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sat, 29 Oct 2022 18:09:17 +0700 Subject: Add input validations before loading dataset for training --- modules/hypernetworks/hypernetwork.py | 38 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 2e84583b..38f35c58 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -332,7 +332,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images - assert hypernetwork_name, 'hypernetwork not selected' + save_hypernetwork_every = save_hypernetwork_every or 0 + create_image_every = create_image_every or 0 + textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, data_root, template_file, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork") path = shared.hypernetworks.get(hypernetwork_name, None) shared.loaded_hypernetwork = Hypernetwork() @@ -358,39 +360,43 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log else: images_dir = None + hypernetwork = shared.loaded_hypernetwork + + ititial_step = hypernetwork.step or 0 + if ititial_step > steps: + shared.state.textinfo = f"Model has already been trained beyond specified max steps" + return hypernetwork, filename + + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + + # dataset loading may take a while, so input validations and early returns should be done before this shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size) + if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) - hypernetwork = shared.loaded_hypernetwork - weights = hypernetwork.weights() - for weight in weights: - weight.requires_grad = True - size = len(ds.indexes) loss_dict = defaultdict(lambda : deque(maxlen = 1024)) losses = torch.zeros((size,)) previous_mean_losses = [0] previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) - - last_saved_file = "" - last_saved_image = "" - forced_filename = "" - - ititial_step = hypernetwork.step or 0 - if ititial_step > steps: - return hypernetwork, filename - - scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + + weights = hypernetwork.weights() + for weight in weights: + weight.requires_grad = True # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) steps_without_grad = 0 + last_saved_file = "" + last_saved_image = "" + forced_filename = "" + pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step -- cgit v1.2.3 From 3ce2bfdf95bd5f26d0f6e250e67338ada91980d1 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sat, 29 Oct 2022 19:43:21 +0700 Subject: Add cleanup after training --- modules/hypernetworks/hypernetwork.py | 201 ++++++++++++++++++---------------- 1 file changed, 105 insertions(+), 96 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 38f35c58..170d5ea4 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -398,110 +398,112 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log forced_filename = "" pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - for i, entries in pbar: - hypernetwork.step = i + ititial_step - if len(loss_dict) > 0: - previous_mean_losses = [i[-1] for i in loss_dict.values()] - previous_mean_loss = mean(previous_mean_losses) - - scheduler.apply(optimizer, hypernetwork.step) - if scheduler.finished: - break - - if shared.state.interrupted: - break - - with torch.autocast("cuda"): - c = stack_conds([entry.cond for entry in entries]).to(devices.device) - # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) - x = torch.stack([entry.latent for entry in entries]).to(devices.device) - loss = shared.sd_model(x, c)[0] - del x - del c - - losses[hypernetwork.step % losses.shape[0]] = loss.item() - for entry in entries: - loss_dict[entry.filename].append(loss.item()) - - optimizer.zero_grad() - weights[0].grad = None - loss.backward() - if weights[0].grad is None: - steps_without_grad += 1 + try: + for i, entries in pbar: + hypernetwork.step = i + ititial_step + if len(loss_dict) > 0: + previous_mean_losses = [i[-1] for i in loss_dict.values()] + previous_mean_loss = mean(previous_mean_losses) + + scheduler.apply(optimizer, hypernetwork.step) + if scheduler.finished: + break + + if shared.state.interrupted: + break + + with torch.autocast("cuda"): + c = stack_conds([entry.cond for entry in entries]).to(devices.device) + # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) + x = torch.stack([entry.latent for entry in entries]).to(devices.device) + loss = shared.sd_model(x, c)[0] + del x + del c + + losses[hypernetwork.step % losses.shape[0]] = loss.item() + for entry in entries: + loss_dict[entry.filename].append(loss.item()) + + optimizer.zero_grad() + weights[0].grad = None + loss.backward() + + if weights[0].grad is None: + steps_without_grad += 1 + else: + steps_without_grad = 0 + assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' + + optimizer.step() + + steps_done = hypernetwork.step + 1 + + if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): + raise RuntimeError("Loss diverged.") + + if len(previous_mean_losses) > 1: + std = stdev(previous_mean_losses) else: - steps_without_grad = 0 - assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' - - optimizer.step() - - steps_done = hypernetwork.step + 1 - - if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): - raise RuntimeError("Loss diverged.") - - if len(previous_mean_losses) > 1: - std = stdev(previous_mean_losses) - else: - std = 0 - dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" - pbar.set_description(dataset_loss_info) - - if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: - # Before saving, change name to match current checkpoint. - hypernetwork.name = f'{hypernetwork_name}-{steps_done}' - last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') - hypernetwork.save(last_saved_file) - - textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { - "loss": f"{previous_mean_loss:.7f}", - "learn_rate": scheduler.learn_rate - }) - - if images_dir is not None and steps_done % create_image_every == 0: - forced_filename = f'{hypernetwork_name}-{steps_done}' - last_saved_image = os.path.join(images_dir, forced_filename) - - optimizer.zero_grad() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) + std = 0 + dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" + pbar.set_description(dataset_loss_info) + + if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: + # Before saving, change name to match current checkpoint. + hypernetwork.name = f'{hypernetwork_name}-{steps_done}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') + hypernetwork.save(last_saved_file) + + textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { + "loss": f"{previous_mean_loss:.7f}", + "learn_rate": scheduler.learn_rate + }) + + if images_dir is not None and steps_done % create_image_every == 0: + forced_filename = f'{hypernetwork_name}-{steps_done}' + last_saved_image = os.path.join(images_dir, forced_filename) + + optimizer.zero_grad() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_index = preview_sampler_index - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entries[0].cond_text + p.steps = 20 - preview_text = p.prompt + preview_text = p.prompt - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) - if image is not None: - shared.state.current_image = image - last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) - last_saved_image += f", prompt: {preview_text}" + if image is not None: + shared.state.current_image = image + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) + last_saved_image += f", prompt: {preview_text}" - shared.state.job_no = hypernetwork.step + shared.state.job_no = hypernetwork.step - shared.state.textinfo = f""" + shared.state.textinfo = f"""

Loss: {previous_mean_loss:.7f}
Step: {hypernetwork.step}
@@ -510,7 +512,14 @@ Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" - + finally: + if weights: + for weight in weights: + weight.requires_grad = False + if unload: + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + report_statistics(loss_dict) checkpoint = sd_models.select_checkpoint() -- cgit v1.2.3 From ab05a74ead9fabb45dd099990e34061c7eb02ca3 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sun, 30 Oct 2022 00:32:02 +0700 Subject: Revert "Add cleanup after training" This reverts commit 3ce2bfdf95bd5f26d0f6e250e67338ada91980d1. --- modules/hypernetworks/hypernetwork.py | 201 ++++++++++++++++------------------ 1 file changed, 96 insertions(+), 105 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 170d5ea4..38f35c58 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -398,112 +398,110 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log forced_filename = "" pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - - try: - for i, entries in pbar: - hypernetwork.step = i + ititial_step - if len(loss_dict) > 0: - previous_mean_losses = [i[-1] for i in loss_dict.values()] - previous_mean_loss = mean(previous_mean_losses) - - scheduler.apply(optimizer, hypernetwork.step) - if scheduler.finished: - break - - if shared.state.interrupted: - break - - with torch.autocast("cuda"): - c = stack_conds([entry.cond for entry in entries]).to(devices.device) - # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) - x = torch.stack([entry.latent for entry in entries]).to(devices.device) - loss = shared.sd_model(x, c)[0] - del x - del c - - losses[hypernetwork.step % losses.shape[0]] = loss.item() - for entry in entries: - loss_dict[entry.filename].append(loss.item()) - - optimizer.zero_grad() - weights[0].grad = None - loss.backward() - - if weights[0].grad is None: - steps_without_grad += 1 - else: - steps_without_grad = 0 - assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' - - optimizer.step() - - steps_done = hypernetwork.step + 1 - - if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): - raise RuntimeError("Loss diverged.") + for i, entries in pbar: + hypernetwork.step = i + ititial_step + if len(loss_dict) > 0: + previous_mean_losses = [i[-1] for i in loss_dict.values()] + previous_mean_loss = mean(previous_mean_losses) - if len(previous_mean_losses) > 1: - std = stdev(previous_mean_losses) + scheduler.apply(optimizer, hypernetwork.step) + if scheduler.finished: + break + + if shared.state.interrupted: + break + + with torch.autocast("cuda"): + c = stack_conds([entry.cond for entry in entries]).to(devices.device) + # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) + x = torch.stack([entry.latent for entry in entries]).to(devices.device) + loss = shared.sd_model(x, c)[0] + del x + del c + + losses[hypernetwork.step % losses.shape[0]] = loss.item() + for entry in entries: + loss_dict[entry.filename].append(loss.item()) + + optimizer.zero_grad() + weights[0].grad = None + loss.backward() + + if weights[0].grad is None: + steps_without_grad += 1 else: - std = 0 - dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" - pbar.set_description(dataset_loss_info) - - if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: - # Before saving, change name to match current checkpoint. - hypernetwork.name = f'{hypernetwork_name}-{steps_done}' - last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') - hypernetwork.save(last_saved_file) - - textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { - "loss": f"{previous_mean_loss:.7f}", - "learn_rate": scheduler.learn_rate - }) - - if images_dir is not None and steps_done % create_image_every == 0: - forced_filename = f'{hypernetwork_name}-{steps_done}' - last_saved_image = os.path.join(images_dir, forced_filename) - - optimizer.zero_grad() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) + steps_without_grad = 0 + assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_index = preview_sampler_index - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 + optimizer.step() - preview_text = p.prompt + steps_done = hypernetwork.step + 1 - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None + if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): + raise RuntimeError("Loss diverged.") + + if len(previous_mean_losses) > 1: + std = stdev(previous_mean_losses) + else: + std = 0 + dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" + pbar.set_description(dataset_loss_info) + + if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: + # Before saving, change name to match current checkpoint. + hypernetwork.name = f'{hypernetwork_name}-{steps_done}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') + hypernetwork.save(last_saved_file) + + textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { + "loss": f"{previous_mean_loss:.7f}", + "learn_rate": scheduler.learn_rate + }) + + if images_dir is not None and steps_done % create_image_every == 0: + forced_filename = f'{hypernetwork_name}-{steps_done}' + last_saved_image = os.path.join(images_dir, forced_filename) + + optimizer.zero_grad() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) - if image is not None: - shared.state.current_image = image - last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) - last_saved_image += f", prompt: {preview_text}" + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entries[0].cond_text + p.steps = 20 + + preview_text = p.prompt + + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) - shared.state.job_no = hypernetwork.step + if image is not None: + shared.state.current_image = image + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) + last_saved_image += f", prompt: {preview_text}" - shared.state.textinfo = f""" + shared.state.job_no = hypernetwork.step + + shared.state.textinfo = f"""

Loss: {previous_mean_loss:.7f}
Step: {hypernetwork.step}
@@ -512,14 +510,7 @@ Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" - finally: - if weights: - for weight in weights: - weight.requires_grad = False - if unload: - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - + report_statistics(loss_dict) checkpoint = sd_models.select_checkpoint() -- cgit v1.2.3 From a07f054c86f33360ff620d6a3fffdee366ab2d99 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sun, 30 Oct 2022 00:49:29 +0700 Subject: Add missing info on hypernetwork/embedding model log Mentioned here: https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/1528#discussioncomment-3991513 Also group the saving into one --- modules/hypernetworks/hypernetwork.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 38f35c58..86daf825 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -361,6 +361,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log images_dir = None hypernetwork = shared.loaded_hypernetwork + checkpoint = sd_models.select_checkpoint() ititial_step = hypernetwork.step or 0 if ititial_step > steps: @@ -449,9 +450,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. - hypernetwork.name = f'{hypernetwork_name}-{steps_done}' - last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') - hypernetwork.save(last_saved_file) + hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt') + save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file) textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { "loss": f"{previous_mean_loss:.7f}", @@ -512,13 +513,23 @@ Last saved image: {html.escape(last_saved_image)}
""" report_statistics(loss_dict) - checkpoint = sd_models.select_checkpoint() - hypernetwork.sd_checkpoint = checkpoint.hash - hypernetwork.sd_checkpoint_name = checkpoint.model_name - # Before saving for the last time, change name back to the base name (as opposed to the save_hypernetwork_every step-suffixed naming convention). - hypernetwork.name = hypernetwork_name - filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork.name}.pt') - hypernetwork.save(filename) + filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') + save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename) return hypernetwork, filename + +def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename): + old_hypernetwork_name = hypernetwork.name + old_sd_checkpoint = hypernetwork.sd_checkpoint if hasattr(hypernetwork, "sd_checkpoint") else None + old_sd_checkpoint_name = hypernetwork.sd_checkpoint_name if hasattr(hypernetwork, "sd_checkpoint_name") else None + try: + hypernetwork.sd_checkpoint = checkpoint.hash + hypernetwork.sd_checkpoint_name = checkpoint.model_name + hypernetwork.name = hypernetwork_name + hypernetwork.save(filename) + except: + hypernetwork.sd_checkpoint = old_sd_checkpoint + hypernetwork.sd_checkpoint_name = old_sd_checkpoint_name + hypernetwork.name = old_hypernetwork_name + raise -- cgit v1.2.3 From 3d58510f214c645ce5cdb261aa47df6573b239e9 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sun, 30 Oct 2022 00:54:59 +0700 Subject: Fix dataset still being loaded even when training will be skipped --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 86daf825..07acadc9 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -364,7 +364,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log checkpoint = sd_models.select_checkpoint() ititial_step = hypernetwork.step or 0 - if ititial_step > steps: + if ititial_step >= steps: shared.state.textinfo = f"Model has already been trained beyond specified max steps" return hypernetwork, filename -- cgit v1.2.3 From 4123be632a98f70cda06e14c2f556f7ad38cd436 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Mon, 31 Oct 2022 13:53:22 +0700 Subject: Fix merge conflicts --- modules/hypernetworks/hypernetwork.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 65a584bb..207808ee 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -373,6 +373,12 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + clip_grad_mode_value = clip_grad_mode == "value" + clip_grad_mode_norm = clip_grad_mode == "norm" + clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm + if clip_grad_enabled: + clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False) + # dataset loading may take a while, so input validations and early returns should be done before this shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): @@ -389,21 +395,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) - last_saved_file = "" - last_saved_image = "" - forced_filename = "" - ititial_step = hypernetwork.step or 0 if ititial_step > steps: return hypernetwork, filename - clip_grad_mode_value = clip_grad_mode == "value" - clip_grad_mode_norm = clip_grad_mode == "norm" - clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm - if clip_grad_enabled: - clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False) - - scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) weights = hypernetwork.weights() for weight in weights: -- cgit v1.2.3 From d5ea878b2aa117588d85287cbd8983aa52177df5 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Mon, 31 Oct 2022 13:54:40 +0700 Subject: Fix merge conflicts --- modules/hypernetworks/hypernetwork.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 207808ee..2df38c70 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -395,11 +395,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) - ititial_step = hypernetwork.step or 0 - if ititial_step > steps: - return hypernetwork, filename - - weights = hypernetwork.weights() for weight in weights: weight.requires_grad = True -- cgit v1.2.3 From 283249d2390f0f3a1c8a55d5d9aa551e3e9b2f9c Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 4 Nov 2022 15:57:17 +0900 Subject: apply --- modules/hypernetworks/hypernetwork.py | 54 +++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 6e1a10cf..de8688a9 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -22,6 +22,8 @@ from collections import defaultdict, deque from statistics import stdev, mean +optimizer_dict = {optim_name : cls_obj for optim_name, cls_obj in inspect.getmembers(torch.optim, inspect.isclass) if optim_name != "Optimizer"} + class HypernetworkModule(torch.nn.Module): multiplier = 1.0 activation_dict = { @@ -142,6 +144,8 @@ class Hypernetwork: self.use_dropout = use_dropout self.activate_output = activate_output self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True + self.optimizer_name = None + self.optimizer_state_dict = None for size in enable_sizes or []: self.layers[size] = ( @@ -163,6 +167,7 @@ class Hypernetwork: def save(self, filename): state_dict = {} + optimizer_saved_dict = {} for k, v in self.layers.items(): state_dict[k] = (v[0].state_dict(), v[1].state_dict()) @@ -178,8 +183,15 @@ class Hypernetwork: state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name state_dict['activate_output'] = self.activate_output state_dict['last_layer_dropout'] = self.last_layer_dropout - + + if self.optimizer_name is not None: + optimizer_saved_dict['optimizer_name'] = self.optimizer_name + torch.save(state_dict, filename) + if self.optimizer_state_dict: + optimizer_saved_dict['hash'] = sd_models.model_hash(filename) + optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict + torch.save(optimizer_saved_dict, filename + '.optim') def load(self, filename): self.filename = filename @@ -202,6 +214,18 @@ class Hypernetwork: print(f"Activate last layer is set to {self.activate_output}") self.last_layer_dropout = state_dict.get('last_layer_dropout', False) + optimizer_saved_dict = torch.load(self.filename + '.optim', map_location = 'cpu') if os.path.exists(self.filename + '.optim') else {} + self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW') + print(f"Optimizer name is {self.optimizer_name}") + if sd_models.model_hash(filename) == optimizer_saved_dict.get('hash', None): + self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None) + else: + self.optimizer_state_dict = None + if self.optimizer_state_dict: + print("Loaded existing optimizer from checkpoint") + else: + print("No saved optimizer exists in checkpoint") + for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( @@ -223,7 +247,7 @@ def list_hypernetworks(path): name = os.path.splitext(os.path.basename(filename))[0] # Prevent a hypothetical "None.pt" from being listed. if name != "None": - res[name] = filename + res[name + f"({sd_models.model_hash(filename)})"] = filename return res @@ -369,6 +393,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log else: hypernetwork_dir = None + hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0] if create_image_every > 0: images_dir = os.path.join(log_directory, "images") os.makedirs(images_dir, exist_ok=True) @@ -404,8 +429,19 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log weights = hypernetwork.weights() for weight in weights: weight.requires_grad = True - # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... - optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + # Here we use optimizer from saved HN, or we can specify as UI option. + if (optimizer_name := hypernetwork.optimizer_name) in optimizer_dict: + optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate) + else: + print(f"Optimizer type {optimizer_name} is not defined!") + optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) + optimizer_name = 'AdamW' + if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer. + try: + optimizer.load_state_dict(hypernetwork.optimizer_state_dict) + except RuntimeError as e: + print("Cannot resume from saved optimizer!") + print(e) steps_without_grad = 0 @@ -467,7 +503,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log # Before saving, change name to match current checkpoint. hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}' last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt') + hypernetwork.optimizer_name = optimizer_name + if shared.opts.save_optimizer_state: + hypernetwork.optimizer_state_dict = optimizer.state_dict() save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file) + hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { "loss": f"{previous_mean_loss:.7f}", @@ -530,8 +570,12 @@ Last saved image: {html.escape(last_saved_image)}
report_statistics(loss_dict) filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') + hypernetwork.optimizer_name = optimizer_name + if shared.opts.save_optimizer_state: + hypernetwork.optimizer_state_dict = optimizer.state_dict() save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename) - + del optimizer + hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. return hypernetwork, filename def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename): -- cgit v1.2.3 From f5d394214d6ee74a682d0a1016bcbebc4b43c13a Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 4 Nov 2022 16:04:03 +0900 Subject: split before declaring file name --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index de8688a9..9b6a3e62 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -382,6 +382,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.state.textinfo = "Initializing hypernetwork training..." shared.state.job_count = steps + hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0] filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name) @@ -393,7 +394,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log else: hypernetwork_dir = None - hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0] if create_image_every > 0: images_dir = os.path.join(log_directory, "images") os.makedirs(images_dir, exist_ok=True) -- cgit v1.2.3 From 1ca0bcd3a7003dd2c1324de7d97fd2a6fc5ddc53 Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 4 Nov 2022 16:09:19 +0900 Subject: only save if option is enabled --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 9b6a3e62..b1f308e2 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -188,7 +188,7 @@ class Hypernetwork: optimizer_saved_dict['optimizer_name'] = self.optimizer_name torch.save(state_dict, filename) - if self.optimizer_state_dict: + if shared.opts.save_optimizer_state and self.optimizer_state_dict: optimizer_saved_dict['hash'] = sd_models.model_hash(filename) optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict torch.save(optimizer_saved_dict, filename + '.optim') -- cgit v1.2.3 From 39541d7725bc42f456a604b07c50aba503a5a09a Mon Sep 17 00:00:00 2001 From: Fampai <> Date: Fri, 4 Nov 2022 04:50:22 -0400 Subject: Fixes race condition in training when VAE is unloaded set_current_image can attempt to use the VAE when it is unloaded to the CPU while training --- modules/hypernetworks/hypernetwork.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 6e1a10cf..fcb96059 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -390,7 +390,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log with torch.autocast("cuda"): ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size) + old_parallel_processing_allowed = shared.parallel_processing_allowed + if unload: + shared.parallel_processing_allowed = False shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) @@ -531,6 +534,7 @@ Last saved image: {html.escape(last_saved_image)}
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename) + shared.parallel_processing_allowed = old_parallel_processing_allowed return hypernetwork, filename -- cgit v1.2.3 From fd62727893f9face287b0a9620251afaa38a627d Mon Sep 17 00:00:00 2001 From: Isaac Poulton Date: Fri, 4 Nov 2022 18:34:35 +0700 Subject: Sort hypernetworks --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 6e1a10cf..f1f04a70 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -224,7 +224,7 @@ def list_hypernetworks(path): # Prevent a hypothetical "None.pt" from being listed. if name != "None": res[name] = filename - return res + return dict(sorted(res.items())) def load_hypernetwork(filename): -- cgit v1.2.3 From 08feb4c364e8b2aed929fd7d22dfa21a93d78b2c Mon Sep 17 00:00:00 2001 From: Isaac Poulton Date: Fri, 4 Nov 2022 20:53:11 +0700 Subject: Sort straight out of the glob --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index f1f04a70..a441ab10 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -219,12 +219,12 @@ class Hypernetwork: def list_hypernetworks(path): res = {} - for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True): + for filename in sorted(glob.iglob(os.path.join(path, '**/*.pt'), recursive=True)): name = os.path.splitext(os.path.basename(filename))[0] # Prevent a hypothetical "None.pt" from being listed. if name != "None": res[name] = filename - return dict(sorted(res.items())) + return res def load_hypernetwork(filename): -- cgit v1.2.3 From bb832d7725187f8a8ab44faa6ee1b38cb5f600aa Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sat, 5 Nov 2022 11:48:38 +0700 Subject: Simplify grad clip --- modules/hypernetworks/hypernetwork.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index f4c2668f..02b624e1 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -385,10 +385,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - clip_grad_mode_value = clip_grad_mode == "value" - clip_grad_mode_norm = clip_grad_mode == "norm" - clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm - if clip_grad_enabled: + clip_grad = torch.nn.utils.clip_grad_value_ if clip_grad_mode == "value" else \ + torch.nn.utils.clip_grad_norm_ if clip_grad_mode == "norm" else \ + None + if clip_grad: clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False) # dataset loading may take a while, so input validations and early returns should be done before this @@ -433,7 +433,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if shared.state.interrupted: break - if clip_grad_enabled: + if clip_grad: clip_grad_sched.step(hypernetwork.step) with torch.autocast("cuda"): @@ -458,10 +458,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log steps_without_grad = 0 assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' - if clip_grad_mode_value: - torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_sched.learn_rate) - elif clip_grad_mode_norm: - torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_sched.learn_rate) + if clip_grad: + clip_grad(weights, clip_grad_sched.learn_rate) optimizer.step() -- cgit v1.2.3 From 62e3d71aa778928d63cab81d9d8cde33e55bebb3 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 5 Nov 2022 17:09:42 +0300 Subject: rework the code to not use the walrus operator because colab's 3.7 does not support it --- modules/hypernetworks/hypernetwork.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 5ceed6ee..7f182712 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -429,13 +429,16 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log weights = hypernetwork.weights() for weight in weights: weight.requires_grad = True + # Here we use optimizer from saved HN, or we can specify as UI option. - if (optimizer_name := hypernetwork.optimizer_name) in optimizer_dict: + if hypernetwork.optimizer_name in optimizer_dict: optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate) + optimizer_name = hypernetwork.optimizer_name else: - print(f"Optimizer type {optimizer_name} is not defined!") + print(f"Optimizer type {hypernetwork.optimizer_name} is not defined!") optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) optimizer_name = 'AdamW' + if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer. try: optimizer.load_state_dict(hypernetwork.optimizer_state_dict) -- cgit v1.2.3 From cdc8020d13c5eef099c609b0a911ccf3568afc0d Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 19 Nov 2022 12:01:51 +0300 Subject: change StableDiffusionProcessing to internally use sampler name instead of sampler index --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7f182712..fbb87dd1 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -12,7 +12,7 @@ import torch import tqdm from einops import rearrange, repeat from ldm.util import default -from modules import devices, processing, sd_models, shared +from modules import devices, processing, sd_models, shared, sd_samplers from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum @@ -535,7 +535,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log p.prompt = preview_prompt p.negative_prompt = preview_negative_prompt p.steps = preview_steps - p.sampler_index = preview_sampler_index + p.sampler_name = sd_samplers.samplers[preview_sampler_index].name p.cfg_scale = preview_cfg_scale p.seed = preview_seed p.width = preview_width -- cgit v1.2.3 From bd68e35de3b7cf7547ed97d8bdf60147402133cc Mon Sep 17 00:00:00 2001 From: flamelaw Date: Sun, 20 Nov 2022 12:35:26 +0900 Subject: Gradient accumulation, autocast fix, new latent sampling method, etc --- modules/hypernetworks/hypernetwork.py | 269 ++++++++++++++++++---------------- 1 file changed, 146 insertions(+), 123 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index fbb87dd1..3d3301b0 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -367,13 +367,13 @@ def report_statistics(loss_info:dict): -def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images save_hypernetwork_every = save_hypernetwork_every or 0 create_image_every = create_image_every or 0 - textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, data_root, template_file, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork") + textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork") path = shared.hypernetworks.get(hypernetwork_name, None) shared.loaded_hypernetwork = Hypernetwork() @@ -403,28 +403,24 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log hypernetwork = shared.loaded_hypernetwork checkpoint = sd_models.select_checkpoint() - ititial_step = hypernetwork.step or 0 - if ititial_step >= steps: + initial_step = hypernetwork.step or 0 + if initial_step >= steps: shared.state.textinfo = f"Model has already been trained beyond specified max steps" return hypernetwork, filename - scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - + scheduler = LearnRateScheduler(learn_rate, steps, initial_step) + # dataset loading may take a while, so input validations and early returns should be done before this shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." - with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size) + + pin_memory = shared.opts.pin_memory + + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method) + dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, batch_size=ds.batch_size, pin_memory=pin_memory) if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) - - size = len(ds.indexes) - loss_dict = defaultdict(lambda : deque(maxlen = 1024)) - losses = torch.zeros((size,)) - previous_mean_losses = [0] - previous_mean_loss = 0 - print("Mean loss of {} elements".format(size)) weights = hypernetwork.weights() for weight in weights: @@ -436,8 +432,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer_name = hypernetwork.optimizer_name else: print(f"Optimizer type {hypernetwork.optimizer_name} is not defined!") - optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) - optimizer_name = 'AdamW' + optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) + optimizer_name = 'AdamW' if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer. try: @@ -446,131 +442,155 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log print("Cannot resume from saved optimizer!") print(e) + scaler = torch.cuda.amp.GradScaler() + + batch_size = ds.batch_size + gradient_step = ds.gradient_step + # n steps = batch_size * gradient_step * n image processed + steps_per_epoch = len(ds) // batch_size // gradient_step + max_steps_per_epoch = len(ds) // batch_size - (len(ds) // batch_size) % gradient_step + loss_step = 0 + _loss_step = 0 #internal + # size = len(ds.indexes) + # loss_dict = defaultdict(lambda : deque(maxlen = 1024)) + # losses = torch.zeros((size,)) + # previous_mean_losses = [0] + # previous_mean_loss = 0 + # print("Mean loss of {} elements".format(size)) + steps_without_grad = 0 last_saved_file = "" last_saved_image = "" forced_filename = "" - pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - for i, entries in pbar: - hypernetwork.step = i + ititial_step - if len(loss_dict) > 0: - previous_mean_losses = [i[-1] for i in loss_dict.values()] - previous_mean_loss = mean(previous_mean_losses) - - scheduler.apply(optimizer, hypernetwork.step) - if scheduler.finished: - break - - if shared.state.interrupted: - break - - with torch.autocast("cuda"): - c = stack_conds([entry.cond for entry in entries]).to(devices.device) - # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) - x = torch.stack([entry.latent for entry in entries]).to(devices.device) - loss = shared.sd_model(x, c)[0] - del x - del c - - losses[hypernetwork.step % losses.shape[0]] = loss.item() - for entry in entries: - loss_dict[entry.filename].append(loss.item()) + pbar = tqdm.tqdm(total=steps - initial_step) + try: + for i in range((steps-initial_step) * gradient_step): + if scheduler.finished: + break + if shared.state.interrupted: + break + for j, batch in enumerate(dl): + # works as a drop_last=True for gradient accumulation + if j == max_steps_per_epoch: + break + scheduler.apply(optimizer, hypernetwork.step) + if scheduler.finished: + break + if shared.state.interrupted: + break + + with torch.autocast("cuda"): + x = batch.latent_sample.to(devices.device, non_blocking=pin_memory) + if tag_drop_out != 0 or shuffle_tags: + shared.sd_model.cond_stage_model.to(devices.device) + c = shared.sd_model.cond_stage_model(batch.cond_text).to(devices.device, non_blocking=pin_memory) + shared.sd_model.cond_stage_model.to(devices.cpu) + else: + c = stack_conds(batch.cond).to(devices.device, non_blocking=pin_memory) + loss = shared.sd_model(x, c)[0] / gradient_step + del x + del c + + _loss_step += loss.item() + scaler.scale(loss).backward() + # go back until we reach gradient accumulation steps + if (j + 1) % gradient_step != 0: + continue + # print(f"grad:{weights[0].grad.detach().cpu().abs().mean().item():.7f}") + # scaler.unscale_(optimizer) + # print(f"grad:{weights[0].grad.detach().cpu().abs().mean().item():.15f}") + # torch.nn.utils.clip_grad_norm_(weights, max_norm=1.0) + # print(f"grad:{weights[0].grad.detach().cpu().abs().mean().item():.15f}") + scaler.step(optimizer) + scaler.update() + hypernetwork.step += 1 + pbar.update() + optimizer.zero_grad(set_to_none=True) + loss_step = _loss_step + _loss_step = 0 + + steps_done = hypernetwork.step + 1 - optimizer.zero_grad() - weights[0].grad = None - loss.backward() - - if weights[0].grad is None: - steps_without_grad += 1 - else: - steps_without_grad = 0 - assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' - - optimizer.step() - - steps_done = hypernetwork.step + 1 - - if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): - raise RuntimeError("Loss diverged.") - - if len(previous_mean_losses) > 1: - std = stdev(previous_mean_losses) - else: - std = 0 - dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" - pbar.set_description(dataset_loss_info) - - if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: - # Before saving, change name to match current checkpoint. - hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}' - last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt') - hypernetwork.optimizer_name = optimizer_name - if shared.opts.save_optimizer_state: - hypernetwork.optimizer_state_dict = optimizer.state_dict() - save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file) - hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. - - textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { - "loss": f"{previous_mean_loss:.7f}", - "learn_rate": scheduler.learn_rate - }) - - if images_dir is not None and steps_done % create_image_every == 0: - forced_filename = f'{hypernetwork_name}-{steps_done}' - last_saved_image = os.path.join(images_dir, forced_filename) - - optimizer.zero_grad() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) - - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_name = sd_samplers.samplers[preview_sampler_index].name - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 + epoch_num = hypernetwork.step // steps_per_epoch + epoch_step = hypernetwork.step % steps_per_epoch + + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step+1}/{steps_per_epoch}]loss: {loss_step:.7f}") + if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: + # Before saving, change name to match current checkpoint. + hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt') + hypernetwork.optimizer_name = optimizer_name + if shared.opts.save_optimizer_state: + hypernetwork.optimizer_state_dict = optimizer.state_dict() + save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file) + hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. + + textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, steps_per_epoch, { + "loss": f"{loss_step:.7f}", + "learn_rate": scheduler.learn_rate + }) + + if images_dir is not None and steps_done % create_image_every == 0: + forced_filename = f'{hypernetwork_name}-{steps_done}' + last_saved_image = os.path.join(images_dir, forced_filename) + + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_name = sd_samplers.samplers[preview_sampler_index].name + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = batch.cond_text[0] + p.steps = 20 + p.width = training_width + p.height = training_height - preview_text = p.prompt + preview_text = p.prompt - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images) > 0 else None - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) - if image is not None: - shared.state.current_image = image - last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) - last_saved_image += f", prompt: {preview_text}" + if image is not None: + shared.state.current_image = image + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) + last_saved_image += f", prompt: {preview_text}" - shared.state.job_no = hypernetwork.step + shared.state.job_no = hypernetwork.step - shared.state.textinfo = f""" + shared.state.textinfo = f"""

-Loss: {previous_mean_loss:.7f}
+Loss: {loss_step:.7f}
Step: {hypernetwork.step}
-Last prompt: {html.escape(entries[0].cond_text)}
+Last prompt: {html.escape(batch.cond_text[0])}
Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" - - report_statistics(loss_dict) + except Exception: + print(traceback.format_exc(), file=sys.stderr) + finally: + pbar.leave = False + pbar.close() + #report_statistics(loss_dict) filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') hypernetwork.optimizer_name = optimizer_name @@ -579,6 +599,9 @@ Last saved image: {html.escape(last_saved_image)}
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename) del optimizer hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + return hypernetwork, filename def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename): -- cgit v1.2.3 From 5b57f61ba47f8b11d19a5b46e7fb5a52458abae5 Mon Sep 17 00:00:00 2001 From: flamelaw Date: Mon, 21 Nov 2022 10:15:46 +0900 Subject: fix pin_memory with different latent sampling method --- modules/hypernetworks/hypernetwork.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3d3301b0..0128419b 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -416,7 +416,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, pin_memory = shared.opts.pin_memory ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method) - dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, batch_size=ds.batch_size, pin_memory=pin_memory) + + latent_sampling_method = ds.latent_sampling_method + + dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory) if unload: shared.sd_model.cond_stage_model.to(devices.cpu) -- cgit v1.2.3 From 89d8ecff09b426ddc89eb5b432825f8f4c218051 Mon Sep 17 00:00:00 2001 From: flamelaw Date: Wed, 23 Nov 2022 02:49:01 +0900 Subject: small fixes --- modules/hypernetworks/hypernetwork.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 0128419b..4541af18 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -435,8 +435,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, optimizer_name = hypernetwork.optimizer_name else: print(f"Optimizer type {hypernetwork.optimizer_name} is not defined!") - optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) - optimizer_name = 'AdamW' + optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) + optimizer_name = 'AdamW' if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer. try: @@ -582,7 +582,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, shared.state.textinfo = f"""

Loss: {loss_step:.7f}
-Step: {hypernetwork.step}
+Step: {steps_done}
Last prompt: {html.escape(batch.cond_text[0])}
Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}
-- cgit v1.2.3 From d2c97fc3fe5857d6fba9ad1695ed3ac6ec455ca9 Mon Sep 17 00:00:00 2001 From: flamelaw Date: Wed, 23 Nov 2022 20:00:00 +0900 Subject: fix dropout, implement train/eval mode --- modules/hypernetworks/hypernetwork.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 4541af18..9388959f 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -154,16 +154,28 @@ class Hypernetwork: HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) + self.eval_mode() def weights(self): res = [] + for k, layers in self.layers.items(): + for layer in layers: + res += layer.parameters() + return res + def train_mode(self): for k, layers in self.layers.items(): for layer in layers: layer.train() - res += layer.trainables() + for param in layer.parameters(): + param.requires_grad = True - return res + def eval_mode(self): + for k, layers in self.layers.items(): + for layer in layers: + layer.eval() + for param in layer.parameters(): + param.requires_grad = False def save(self, filename): state_dict = {} @@ -426,8 +438,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, shared.sd_model.first_stage_model.to(devices.cpu) weights = hypernetwork.weights() - for weight in weights: - weight.requires_grad = True + hypernetwork.train_mode() # Here we use optimizer from saved HN, or we can specify as UI option. if hypernetwork.optimizer_name in optimizer_dict: @@ -538,7 +549,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, if images_dir is not None and steps_done % create_image_every == 0: forced_filename = f'{hypernetwork_name}-{steps_done}' last_saved_image = os.path.join(images_dir, forced_filename) - + hypernetwork.eval_mode() shared.sd_model.cond_stage_model.to(devices.device) shared.sd_model.first_stage_model.to(devices.device) @@ -571,7 +582,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) - + hypernetwork.train_mode() if image is not None: shared.state.current_image = image last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) @@ -593,6 +604,7 @@ Last saved image: {html.escape(last_saved_image)}
finally: pbar.leave = False pbar.close() + hypernetwork.eval_mode() #report_statistics(loss_dict) filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') -- cgit v1.2.3 From 1bd57cc9791e2e742f72a3d74d589f2c289e8e92 Mon Sep 17 00:00:00 2001 From: flamelaw Date: Wed, 23 Nov 2022 20:21:52 +0900 Subject: last_layer_dropout default to False --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 9388959f..8466887f 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -38,7 +38,7 @@ class HypernetworkModule(torch.nn.Module): activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', - add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=True): + add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" -- cgit v1.2.3 From 4d5f1691dda971ec7b461dd880426300fd54ccee Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 28 Nov 2022 21:36:35 -0500 Subject: Use devices.autocast instead of torch.autocast --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8466887f..eb5ae372 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -495,7 +495,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, if shared.state.interrupted: break - with torch.autocast("cuda"): + with devices.autocast(): x = batch.latent_sample.to(devices.device, non_blocking=pin_memory) if tag_drop_out != 0 or shuffle_tags: shared.sd_model.cond_stage_model.to(devices.device) -- cgit v1.2.3 From 3bf5591efe9a9f219c6088be322a87adc4f48f95 Mon Sep 17 00:00:00 2001 From: Yuval Aboulafia Date: Sat, 24 Dec 2022 21:35:29 +0200 Subject: fix F541 f-string without any placeholders --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index c406ffb3..9d3034ae 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -277,7 +277,7 @@ def load_hypernetwork(filename): print(traceback.format_exc(), file=sys.stderr) else: if shared.loaded_hypernetwork is not None: - print(f"Unloading hypernetwork") + print("Unloading hypernetwork") shared.loaded_hypernetwork = None @@ -417,7 +417,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, initial_step = hypernetwork.step or 0 if initial_step >= steps: - shared.state.textinfo = f"Model has already been trained beyond specified max steps" + shared.state.textinfo = "Model has already been trained beyond specified max steps" return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, initial_step) -- cgit v1.2.3 From 5f1dfbbc959855fd90ba80c0c76301d2063772fa Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 24 Dec 2022 18:02:22 -0500 Subject: implement train api --- modules/hypernetworks/hypernetwork.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index c406ffb3..3182ff03 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -378,6 +378,32 @@ def report_statistics(loss_info:dict): print(e) +def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False): + # Remove illegal characters from name. + name = "".join( x for x in name if (x.isalnum() or x in "._- ")) + + fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") + if not overwrite_old: + assert not os.path.exists(fn), f"file {fn} already exists" + + if type(layer_structure) == str: + layer_structure = [float(x.strip()) for x in layer_structure.split(",")] + + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( + name=name, + enable_sizes=[int(x) for x in enable_sizes], + layer_structure=layer_structure, + activation_func=activation_func, + weight_init=weight_init, + add_layer_norm=add_layer_norm, + use_dropout=use_dropout, + ) + hypernet.save(fn) + + shared.reload_hypernetworks() + + return fn + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. -- cgit v1.2.3 From 192ddc04d6de0d780f73aa5fbaa8c66cd4642e1c Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 3 Jan 2023 10:34:51 -0500 Subject: add job info to modules --- modules/hypernetworks/hypernetwork.py | 1 + 1 file changed, 1 insertion(+) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 109e8078..450fecac 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -417,6 +417,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, shared.loaded_hypernetwork = Hypernetwork() shared.loaded_hypernetwork.load(path) + shared.state.job = "train-hypernetwork" shared.state.textinfo = "Initializing hypernetwork training..." shared.state.job_count = steps -- cgit v1.2.3 From b85c2b5cf4a6809bc871718cf4680d49c3e95e94 Mon Sep 17 00:00:00 2001 From: timntorres Date: Thu, 5 Jan 2023 08:14:38 -0800 Subject: Clean up ti, add same behavior to hypernetwork. --- modules/hypernetworks/hypernetwork.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 6a9b1398..d5985263 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -401,7 +401,33 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, hypernet.save(fn) shared.reload_hypernetworks() +# Note: textual_inversion.py has a nearly identical function of the same name. +def save_settings_to_file(initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): + checkpoint = sd_models.select_checkpoint() + model_name = checkpoint.model_name + model_hash = '[{}]'.format(checkpoint.hash) + # Starting index of preview-related arguments. + border_index = 19 + + # Get a list of the argument names, excluding default argument. + sig = inspect.signature(save_settings_to_file) + arg_names = [p.name for p in sig.parameters.values() if p.default == p.empty] + + # Create a list of the argument names to include in the settings string. + names = arg_names[:border_index] # Include all arguments up until the preview-related ones. + + # Include preview-related arguments if applicable. + if preview_from_txt2img: + names.extend(arg_names[border_index:]) + + # Build the settings string. + settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n" + for name in names: + value = locals()[name] + settings_str += f"{name}: {value}\n" + with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout: + fout.write(settings_str + "\n\n") def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. @@ -457,7 +483,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, pin_memory = shared.opts.pin_memory ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method) - + + if shared.opts.save_training_settings_to_txt: + save_settings_to_file(initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height) + latent_sampling_method = ds.latent_sampling_method dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory) -- cgit v1.2.3 From b6bab2f052b32c0ffebe6aecc1819ccf20cf8c5d Mon Sep 17 00:00:00 2001 From: timntorres Date: Thu, 5 Jan 2023 09:14:56 -0800 Subject: Include model in log file. Exclude directory. --- modules/hypernetworks/hypernetwork.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index d5985263..3237c37a 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -402,30 +402,22 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, shared.reload_hypernetworks() # Note: textual_inversion.py has a nearly identical function of the same name. -def save_settings_to_file(initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): - checkpoint = sd_models.select_checkpoint() - model_name = checkpoint.model_name - model_hash = '[{}]'.format(checkpoint.hash) +def save_settings_to_file(model_name, model_hash, initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # Starting index of preview-related arguments. - border_index = 19 - - # Get a list of the argument names, excluding default argument. - sig = inspect.signature(save_settings_to_file) - arg_names = [p.name for p in sig.parameters.values() if p.default == p.empty] - + border_index = 21 + # Get a list of the argument names. + arg_names = inspect.getfullargspec(save_settings_to_file).args # Create a list of the argument names to include in the settings string. names = arg_names[:border_index] # Include all arguments up until the preview-related ones. - - # Include preview-related arguments if applicable. if preview_from_txt2img: - names.extend(arg_names[border_index:]) - + names.extend(arg_names[border_index:]) # Include preview-related arguments if applicable. # Build the settings string. settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n" for name in names: - value = locals()[name] - settings_str += f"{name}: {value}\n" - + if name != 'log_directory': # It's useless and redundant to save log_directory. + value = locals()[name] + settings_str += f"{name}: {value}\n" + # Create or append to the file. with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout: fout.write(settings_str + "\n\n") @@ -485,7 +477,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method) if shared.opts.save_training_settings_to_txt: - save_settings_to_file(initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height) + save_settings_to_file(checkpoint.model_name, '[{}]'.format(checkpoint.hash), initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height) latent_sampling_method = ds.latent_sampling_method -- cgit v1.2.3 From 683287d87f6401083a8d63eedc00ca7410214ca1 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 6 Jan 2023 08:52:06 +0300 Subject: rework saving training params to file #6372 --- modules/hypernetworks/hypernetwork.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3237c37a..b0cfbe71 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -13,7 +13,7 @@ import tqdm from einops import rearrange, repeat from ldm.util import default from modules import devices, processing, sd_models, shared, sd_samplers -from modules.textual_inversion import textual_inversion +from modules.textual_inversion import textual_inversion, logging from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_ @@ -401,25 +401,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, hypernet.save(fn) shared.reload_hypernetworks() -# Note: textual_inversion.py has a nearly identical function of the same name. -def save_settings_to_file(model_name, model_hash, initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): - # Starting index of preview-related arguments. - border_index = 21 - # Get a list of the argument names. - arg_names = inspect.getfullargspec(save_settings_to_file).args - # Create a list of the argument names to include in the settings string. - names = arg_names[:border_index] # Include all arguments up until the preview-related ones. - if preview_from_txt2img: - names.extend(arg_names[border_index:]) # Include preview-related arguments if applicable. - # Build the settings string. - settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n" - for name in names: - if name != 'log_directory': # It's useless and redundant to save log_directory. - value = locals()[name] - settings_str += f"{name}: {value}\n" - # Create or append to the file. - with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout: - fout.write(settings_str + "\n\n") + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. @@ -477,7 +459,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method) if shared.opts.save_training_settings_to_txt: - save_settings_to_file(checkpoint.model_name, '[{}]'.format(checkpoint.hash), initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height) + saved_params = dict( + model_name=checkpoint.model_name, model_hash=checkpoint.hash, num_of_dataset_images=len(ds), + **{field: getattr(hypernetwork, field) for field in ['layer_structure', 'activation_func', 'weight_init', 'add_layer_norm', 'use_dropout', ]} + ) + logging.save_settings_to_file(log_directory, {**saved_params, **locals()}) latent_sampling_method = ds.latent_sampling_method -- cgit v1.2.3 From 669fb18d5222f53ae48abe0f30393d846c50ad91 Mon Sep 17 00:00:00 2001 From: dan Date: Sun, 8 Jan 2023 01:34:52 +0800 Subject: Add checkbox for variable training dims --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b0cfbe71..dba52841 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -403,7 +403,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, shared.reload_hypernetworks() -def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images -- cgit v1.2.3 From 72497895b9b1948f86d9309fe897cbb70c20ba7e Mon Sep 17 00:00:00 2001 From: dan Date: Sun, 8 Jan 2023 01:36:00 +0800 Subject: Move batchsize check --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index dba52841..32c67ccc 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -456,7 +456,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, pin_memory = shared.opts.pin_memory - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method, varsize=varsize) if shared.opts.save_training_settings_to_txt: saved_params = dict( -- cgit v1.2.3 From 1fbb6f9ebe48326a3b12ecf611105dbc4a46891e Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Mon, 9 Jan 2023 23:35:40 +0300 Subject: make a dropdown for prompt template selection --- modules/hypernetworks/hypernetwork.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 32c67ccc..ea3f1db9 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -24,6 +24,7 @@ from statistics import stdev, mean optimizer_dict = {optim_name : cls_obj for optim_name, cls_obj in inspect.getmembers(torch.optim, inspect.isclass) if optim_name != "Optimizer"} + class HypernetworkModule(torch.nn.Module): multiplier = 1.0 activation_dict = { @@ -403,13 +404,15 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, shared.reload_hypernetworks() -def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_filename, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images save_hypernetwork_every = save_hypernetwork_every or 0 create_image_every = create_image_every or 0 - textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork") + template_file = textual_inversion.textual_inversion_templates.get(template_filename, None) + textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, template_file, template_filename, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork") + template_file = template_file.path path = shared.hypernetworks.get(hypernetwork_name, None) shared.loaded_hypernetwork = Hypernetwork() -- cgit v1.2.3 From a4a5475cfa3c68af6cb046081002a72f862ce4be Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Tue, 10 Jan 2023 14:56:57 +0900 Subject: Variable dropout rate Implements variable dropout rate from #4549 Fixes hypernetwork multiplier being able to modified during training, also fixes user-errors by setting multiplier value to lower values for training. Changes function name to match torch.nn.module standard Fixes RNG reset issue when generating previews by restoring RNG state --- modules/hypernetworks/hypernetwork.py | 101 +++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 25 deletions(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index ea3f1db9..300d3975 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -39,7 +39,7 @@ class HypernetworkModule(torch.nn.Module): activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', - add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=False): + add_layer_norm=False, activate_output=False, dropout_structure=None): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -64,9 +64,12 @@ class HypernetworkModule(torch.nn.Module): if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - # Add dropout except last layer - if use_dropout and (i < len(layer_structure) - 3 or last_layer_dropout and i < len(layer_structure) - 2): - linears.append(torch.nn.Dropout(p=0.3)) + # Everything should be now parsed into dropout structure, and applied here. + # Since we only have dropouts after layers, dropout structure should start with 0 and end with 0. + if dropout_structure is not None and dropout_structure[i+1] > 0: + assert 0 < dropout_structure[i+1] < 1, "Dropout probability should be 0 or float between 0 and 1!" + linears.append(torch.nn.Dropout(p=dropout_structure[i+1])) + # Code explanation : [1, 2, 1] -> dropout is missing when last_layer_dropout is false. [1, 2, 2, 1] -> [0, 0.3, 0, 0], when its True, [0, 0.3, 0.3, 0]. self.linear = torch.nn.Sequential(*linears) @@ -113,7 +116,7 @@ class HypernetworkModule(torch.nn.Module): state_dict[to] = x def forward(self, x): - return x + self.linear(x) * self.multiplier + return x + self.linear(x) * (HypernetworkModule.multiplier if not self.training else 1) def trainables(self): layer_structure = [] @@ -126,6 +129,21 @@ class HypernetworkModule(torch.nn.Module): def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength +#param layer_structure : sequence used for length, use_dropout : controlling boolean, last_layer_dropout : for compatibility check. +def parse_dropout_structure(layer_structure, use_dropout, last_layer_dropout): + if layer_structure is None: + layer_structure = [1, 2, 1] + if not use_dropout: + return [0] * len(layer_structure) + dropout_values = [0] + dropout_values.extend([0.3] * (len(layer_structure) - 3)) + if last_layer_dropout: + dropout_values.append(0.3) + else: + dropout_values.append(0) + dropout_values.append(0) + return dropout_values + class Hypernetwork: filename = None @@ -144,18 +162,22 @@ class Hypernetwork: self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout self.activate_output = activate_output - self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True + self.last_layer_dropout = kwargs.get('last_layer_dropout', True) + self.dropout_structure = kwargs.get('dropout_structure', None) + if self.dropout_structure is None: + self.dropout_structure = parse_dropout_structure(self.layer_structure, self.use_dropout, self.last_layer_dropout) self.optimizer_name = None self.optimizer_state_dict = None + self.optional_info = None for size in enable_sizes or []: self.layers[size] = ( HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure), HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure), ) - self.eval_mode() + self.eval() def weights(self): res = [] @@ -164,14 +186,14 @@ class Hypernetwork: res += layer.parameters() return res - def train_mode(self): + def train(self, mode=True): for k, layers in self.layers.items(): for layer in layers: - layer.train() + layer.train(mode=mode) for param in layer.parameters(): - param.requires_grad = True + param.requires_grad = mode - def eval_mode(self): + def eval(self): for k, layers in self.layers.items(): for layer in layers: layer.eval() @@ -191,11 +213,13 @@ class Hypernetwork: state_dict['activation_func'] = self.activation_func state_dict['is_layer_norm'] = self.add_layer_norm state_dict['weight_initialization'] = self.weight_init - state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name state_dict['activate_output'] = self.activate_output - state_dict['last_layer_dropout'] = self.last_layer_dropout + state_dict['use_dropout'] = self.use_dropout + state_dict['dropout_structure'] = self.dropout_structure + state_dict['last_layer_dropout'] = (self.dropout_structure[-2] != 0) if self.dropout_structure is not None else self.last_layer_dropout + state_dict['optional_info'] = self.optional_info if self.optional_info else None if self.optimizer_name is not None: optimizer_saved_dict['optimizer_name'] = self.optimizer_name @@ -215,43 +239,56 @@ class Hypernetwork: self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) print(self.layer_structure) + optional_info = state_dict.get('optional_info', None) + if optional_info is not None: + print(f"INFO:\n {optional_info}\n") + self.optional_info = optional_info self.activation_func = state_dict.get('activation_func', None) print(f"Activation function is {self.activation_func}") self.weight_init = state_dict.get('weight_initialization', 'Normal') print(f"Weight initialization is {self.weight_init}") self.add_layer_norm = state_dict.get('is_layer_norm', False) print(f"Layer norm is set to {self.add_layer_norm}") - self.use_dropout = state_dict.get('use_dropout', False) + self.dropout_structure = state_dict.get('dropout_structure', None) + self.use_dropout = True if self.dropout_structure is not None and any(self.dropout_structure) else state_dict.get('use_dropout', False) print(f"Dropout usage is set to {self.use_dropout}" ) self.activate_output = state_dict.get('activate_output', True) print(f"Activate last layer is set to {self.activate_output}") self.last_layer_dropout = state_dict.get('last_layer_dropout', False) + # Dropout structure should have same length as layer structure, Every digits should be in [0,1), and last digit must be 0. + if self.dropout_structure is None: + print("Using previous dropout structure") + self.dropout_structure = parse_dropout_structure(self.layer_structure, self.use_dropout, self.last_layer_dropout) + print(f"Dropout structure is set to {self.dropout_structure}") optimizer_saved_dict = torch.load(self.filename + '.optim', map_location = 'cpu') if os.path.exists(self.filename + '.optim') else {} - self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW') - print(f"Optimizer name is {self.optimizer_name}") + if sd_models.model_hash(filename) == optimizer_saved_dict.get('hash', None): self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None) else: self.optimizer_state_dict = None if self.optimizer_state_dict: + self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW') print("Loaded existing optimizer from checkpoint") + print(f"Optimizer name is {self.optimizer_name}") else: + self.optimizer_name = "AdamW" print("No saved optimizer exists in checkpoint") for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + self.add_layer_norm, self.activate_output, self.dropout_structure), HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, - self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + self.add_layer_norm, self.activate_output, self.dropout_structure), ) self.name = state_dict.get('name', self.name) self.step = state_dict.get('step', 0) self.sd_checkpoint = state_dict.get('sd_checkpoint', None) self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) + self.eval() def list_hypernetworks(path): @@ -379,9 +416,10 @@ def report_statistics(loss_info:dict): print(e) -def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False): +def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None): # Remove illegal characters from name. name = "".join( x for x in name if (x.isalnum() or x in "._- ")) + assert name, "Name cannot be empty!" fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") if not overwrite_old: @@ -390,6 +428,11 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, if type(layer_structure) == str: layer_structure = [float(x.strip()) for x in layer_structure.split(",")] + if use_dropout and dropout_structure and type(dropout_structure) == str: + dropout_structure = [float(x.strip()) for x in dropout_structure.split(",")] + else: + dropout_structure = [0] * len(layer_structure) + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( name=name, enable_sizes=[int(x) for x in enable_sizes], @@ -398,6 +441,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, weight_init=weight_init, add_layer_norm=add_layer_norm, use_dropout=use_dropout, + dropout_structure=dropout_structure ) hypernet.save(fn) @@ -480,7 +524,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, shared.sd_model.first_stage_model.to(devices.cpu) weights = hypernetwork.weights() - hypernetwork.train_mode() + hypernetwork.train() # Here we use optimizer from saved HN, or we can specify as UI option. if hypernetwork.optimizer_name in optimizer_dict: @@ -594,7 +638,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, if images_dir is not None and steps_done % create_image_every == 0: forced_filename = f'{hypernetwork_name}-{steps_done}' last_saved_image = os.path.join(images_dir, forced_filename) - hypernetwork.eval_mode() + hypernetwork.eval() + rng_state = torch.get_rng_state() + cuda_rng_state = None + if torch.cuda.is_available(): + cuda_rng_state = torch.cuda.get_rng_state_all() shared.sd_model.cond_stage_model.to(devices.device) shared.sd_model.first_stage_model.to(devices.device) @@ -627,7 +675,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) - hypernetwork.train_mode() + torch.set_rng_state(rng_state) + if torch.cuda.is_available(): + torch.cuda.set_rng_state_all(cuda_rng_state) + hypernetwork.train() if image is not None: shared.state.current_image = image last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) @@ -649,7 +700,7 @@ Last saved image: {html.escape(last_saved_image)}
finally: pbar.leave = False pbar.close() - hypernetwork.eval_mode() + hypernetwork.eval() #report_statistics(loss_dict) filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') -- cgit v1.2.3 From 3f43d8a966ba8462ba019a5ad573f94508cd45f8 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 11 Jan 2023 10:28:55 -0500 Subject: set descriptions --- modules/hypernetworks/hypernetwork.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks/hypernetwork.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 300d3975..194679e8 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -619,7 +619,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, epoch_num = hypernetwork.step // steps_per_epoch epoch_step = hypernetwork.step % steps_per_epoch - pbar.set_description(f"[Epoch {epoch_num}: {epoch_step+1}/{steps_per_epoch}]loss: {loss_step:.7f}") + description = f"Training hypernetwork [Epoch {epoch_num}: {epoch_step+1}/{steps_per_epoch}]loss: {loss_step:.7f}" + pbar.set_description(description) + shared.state.textinfo = description if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}' -- cgit v1.2.3