From 2f4c91894d4c0a055c1069b2fda0e4da8fcda188 Mon Sep 17 00:00:00 2001
From: guaneec
Date: Wed, 26 Oct 2022 12:10:30 +0800
Subject: Remove activation from final layer of HNs
---
modules/hypernetworks/hypernetwork.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index d647ea55..54346b64 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -41,8 +41,8 @@ class HypernetworkModule(torch.nn.Module):
# Add a fully-connected layer
linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
- # Add an activation func
- if activation_func == "linear" or activation_func is None:
+ # Add an activation func except last layer
+ if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 3:
pass
elif activation_func in self.activation_dict:
linears.append(self.activation_dict[activation_func]())
@@ -53,7 +53,7 @@ class HypernetworkModule(torch.nn.Module):
if add_layer_norm:
linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
- # Add dropout expect last layer
+ # Add dropout except last layer
if use_dropout and i < len(layer_structure) - 3:
linears.append(torch.nn.Dropout(p=0.3))
--
cgit v1.2.3
From c702d4d0df21790199d199818f25c449213ffe0f Mon Sep 17 00:00:00 2001
From: guaneec
Date: Wed, 26 Oct 2022 13:43:04 +0800
Subject: Fix off-by-one
---
modules/hypernetworks/hypernetwork.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 54346b64..3ce85bb5 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -42,7 +42,7 @@ class HypernetworkModule(torch.nn.Module):
linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
# Add an activation func except last layer
- if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 3:
+ if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 2:
pass
elif activation_func in self.activation_dict:
linears.append(self.activation_dict[activation_func]())
@@ -54,7 +54,7 @@ class HypernetworkModule(torch.nn.Module):
linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
# Add dropout except last layer
- if use_dropout and i < len(layer_structure) - 3:
+ if use_dropout and i < len(layer_structure) - 2:
linears.append(torch.nn.Dropout(p=0.3))
self.linear = torch.nn.Sequential(*linears)
--
cgit v1.2.3
From 877d94f97ca5491d8779440769b191e0dcd32c8e Mon Sep 17 00:00:00 2001
From: guaneec
Date: Wed, 26 Oct 2022 14:50:58 +0800
Subject: Back compatibility
---
modules/hypernetworks/hypernetwork.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 3ce85bb5..dd317085 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -28,7 +28,7 @@ class HypernetworkModule(torch.nn.Module):
"swish": torch.nn.Hardswish,
}
- def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False):
+ def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False, activate_output=False):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
@@ -42,7 +42,7 @@ class HypernetworkModule(torch.nn.Module):
linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
# Add an activation func except last layer
- if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 2:
+ if activation_func == "linear" or activation_func is None or (i >= len(layer_structure) - 2 and not activate_output):
pass
elif activation_func in self.activation_dict:
linears.append(self.activation_dict[activation_func]())
@@ -105,7 +105,7 @@ class Hypernetwork:
filename = None
name = None
- def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False):
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False, activate_output=False):
self.filename = None
self.name = name
self.layers = {}
@@ -116,11 +116,12 @@ class Hypernetwork:
self.activation_func = activation_func
self.add_layer_norm = add_layer_norm
self.use_dropout = use_dropout
+ self.activate_output = activate_output
for size in enable_sizes or []:
self.layers[size] = (
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output),
)
def weights(self):
@@ -147,6 +148,7 @@ class Hypernetwork:
state_dict['use_dropout'] = self.use_dropout
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
+ state_dict['activate_output'] = self.activate_output
torch.save(state_dict, filename)
@@ -161,12 +163,13 @@ class Hypernetwork:
self.activation_func = state_dict.get('activation_func', None)
self.add_layer_norm = state_dict.get('is_layer_norm', False)
self.use_dropout = state_dict.get('use_dropout', False)
+ self.activate_output = state_dict.get('activate_output', True)
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (
- HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output),
+ HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output),
)
self.name = state_dict.get('name', self.name)
--
cgit v1.2.3
From 91bb35b1e6842b30ce7553009c8ecea3643de8d2 Mon Sep 17 00:00:00 2001
From: guaneec
Date: Wed, 26 Oct 2022 15:00:03 +0800
Subject: Merge fix
---
modules/hypernetworks/hypernetwork.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index eab8b32f..bd171793 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -190,7 +190,7 @@ class Hypernetwork:
print(f"Weight initialization is {self.weight_init}")
self.add_layer_norm = state_dict.get('is_layer_norm', False)
print(f"Layer norm is set to {self.add_layer_norm}")
- self.use_dropout = state_dict.get('use_dropout', False
+ self.use_dropout = state_dict.get('use_dropout', False)
print(f"Dropout usage is set to {self.use_dropout}" )
self.activate_output = state_dict.get('activate_output', True)
--
cgit v1.2.3
From b6a8bb123bd519736306417399f6441e504f1e8b Mon Sep 17 00:00:00 2001
From: guaneec
Date: Wed, 26 Oct 2022 15:15:19 +0800
Subject: Fix merge
---
modules/hypernetworks/hypernetwork.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index bd171793..2997cead 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -60,7 +60,7 @@ class HypernetworkModule(torch.nn.Module):
linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
# Add dropout except last layer
- if use_dropout and i < len(layer_structure) - 2:
+ if use_dropout and i < len(layer_structure) - 3:
linears.append(torch.nn.Dropout(p=0.3))
self.linear = torch.nn.Sequential(*linears)
@@ -126,7 +126,7 @@ class Hypernetwork:
filename = None
name = None
- def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False)
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False):
self.filename = None
self.name = name
self.layers = {}
--
cgit v1.2.3
From 85fcccc105aa50f1d78de559233eaa9f384608b5 Mon Sep 17 00:00:00 2001
From: AngelBottomless <35677394+aria1th@users.noreply.github.com>
Date: Wed, 26 Oct 2022 22:24:33 +0900
Subject: Squashed commit of fixing dropout silently
fix dropouts for future hypernetworks
add kwargs for Hypernetwork class
hypernet UI for gradio input
add recommended options
remove as options
revert adding options in ui
---
modules/hypernetworks/hypernetwork.py | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 2997cead..dd921153 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -34,7 +34,8 @@ class HypernetworkModule(torch.nn.Module):
}
activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
- def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False, activate_output=False):
+ def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal',
+ add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
@@ -60,7 +61,7 @@ class HypernetworkModule(torch.nn.Module):
linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
# Add dropout except last layer
- if use_dropout and i < len(layer_structure) - 3:
+ if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2:
linears.append(torch.nn.Dropout(p=0.3))
self.linear = torch.nn.Sequential(*linears)
@@ -126,7 +127,7 @@ class Hypernetwork:
filename = None
name = None
- def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False):
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs):
self.filename = None
self.name = name
self.layers = {}
@@ -139,11 +140,14 @@ class Hypernetwork:
self.add_layer_norm = add_layer_norm
self.use_dropout = use_dropout
self.activate_output = activate_output
+ self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True
for size in enable_sizes or []:
self.layers[size] = (
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output),
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout),
)
def weights(self):
@@ -172,7 +176,8 @@ class Hypernetwork:
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
state_dict['activate_output'] = self.activate_output
-
+ state_dict['last_layer_dropout'] = self.last_layer_dropout
+
torch.save(state_dict, filename)
def load(self, filename):
@@ -193,12 +198,16 @@ class Hypernetwork:
self.use_dropout = state_dict.get('use_dropout', False)
print(f"Dropout usage is set to {self.use_dropout}" )
self.activate_output = state_dict.get('activate_output', True)
+ print(f"Activate last layer is set to {self.activate_output}")
+ self.last_layer_dropout = state_dict.get('last_layer_dropout', False)
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (
- HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output),
- HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output),
+ HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout),
+ HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout),
)
self.name = state_dict.get('name', self.name)
--
cgit v1.2.3
From cc56df996e95c2c82295ab7b9928da2544791220 Mon Sep 17 00:00:00 2001
From: guaneec
Date: Wed, 26 Oct 2022 23:51:51 +0800
Subject: Fix dropout logic
---
modules/hypernetworks/hypernetwork.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index dd921153..b17598fe 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -35,7 +35,7 @@ class HypernetworkModule(torch.nn.Module):
activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal',
- add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs):
+ add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=True):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
@@ -61,7 +61,7 @@ class HypernetworkModule(torch.nn.Module):
linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
# Add dropout except last layer
- if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2:
+ if use_dropout and (i < len(layer_structure) - 3 or last_layer_dropout and i < len(layer_structure) - 2):
linears.append(torch.nn.Dropout(p=0.3))
self.linear = torch.nn.Sequential(*linears)
--
cgit v1.2.3
From 029d7c75436558f1e884bb127caed73caaecb83a Mon Sep 17 00:00:00 2001
From: AngelBottomless <35677394+aria1th@users.noreply.github.com>
Date: Thu, 27 Oct 2022 14:44:53 +0900
Subject: Revert unresolved changes in Bias initialization
it should be zeros_ or parameterized in future properly.
---
modules/hypernetworks/hypernetwork.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index b17598fe..25427a37 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -75,7 +75,7 @@ class HypernetworkModule(torch.nn.Module):
w, b = layer.weight.data, layer.bias.data
if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm:
normal_(w, mean=0.0, std=0.01)
- normal_(b, mean=0.0, std=0.005)
+ normal_(b, mean=0.0, std=0)
elif weight_init == 'XavierUniform':
xavier_uniform_(w)
zeros_(b)
--
cgit v1.2.3
From 20194fd9752a280306fb66b57b258609b0918c46 Mon Sep 17 00:00:00 2001
From: AngelBottomless <35677394+aria1th@users.noreply.github.com>
Date: Sat, 29 Oct 2022 16:56:42 +0900
Subject: We have duplicate linear now
---
modules/hypernetworks/ui.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py
index aad09ffc..c2d4b51c 100644
--- a/modules/hypernetworks/ui.py
+++ b/modules/hypernetworks/ui.py
@@ -9,7 +9,7 @@ from modules import devices, sd_hijack, shared
from modules.hypernetworks import hypernetwork
not_available = ["hardswish", "multiheadattention"]
-keys = ["linear"] + list(x for x in hypernetwork.HypernetworkModule.activation_dict.keys() if x not in not_available)
+keys = list(x for x in hypernetwork.HypernetworkModule.activation_dict.keys() if x not in not_available)
def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
# Remove illegal characters from name.
--
cgit v1.2.3
From 9d96d7d0a0aa0a966a9aefd24342345eb65952ed Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Sun, 30 Oct 2022 20:39:04 +0900
Subject: resolve conflicts
---
modules/hypernetworks/hypernetwork.py | 44 ++++++++++++++++++++++++++++++-----
1 file changed, 38 insertions(+), 6 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index a11e01d6..8f74cdea 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -21,6 +21,7 @@ from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_norm
from collections import defaultdict, deque
from statistics import stdev, mean
+optimizer_dict = {optim_name : cls_obj for optim_name, cls_obj in inspect.getmembers(torch.optim, inspect.isclass) if optim_name != "Optimizer"}
class HypernetworkModule(torch.nn.Module):
multiplier = 1.0
@@ -139,6 +140,8 @@ class Hypernetwork:
self.weight_init = weight_init
self.add_layer_norm = add_layer_norm
self.use_dropout = use_dropout
+ self.optimizer_name = None
+ self.optimizer_state_dict = None
for size in enable_sizes or []:
self.layers[size] = (
@@ -171,6 +174,10 @@ class Hypernetwork:
state_dict['use_dropout'] = self.use_dropout
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
+ if self.optimizer_name is not None:
+ state_dict['optimizer_name'] = self.optimizer_name
+ if self.optimizer_state_dict:
+ state_dict['optimizer_state_dict'] = self.optimizer_state_dict
torch.save(state_dict, filename)
@@ -190,7 +197,14 @@ class Hypernetwork:
self.add_layer_norm = state_dict.get('is_layer_norm', False)
print(f"Layer norm is set to {self.add_layer_norm}")
self.use_dropout = state_dict.get('use_dropout', False)
- print(f"Dropout usage is set to {self.use_dropout}" )
+ print(f"Dropout usage is set to {self.use_dropout}")
+ self.optimizer_name = state_dict.get('optimizer_name', 'AdamW')
+ print(f"Optimizer name is {self.optimizer_name}")
+ self.optimizer_state_dict = state_dict.get('optimizer_state_dict', None)
+ if self.optimizer_state_dict:
+ print("Loaded existing optimizer from checkpoint")
+ else:
+ print("No saved optimizer exists in checkpoint")
for size, sd in state_dict.items():
if type(size) == int:
@@ -392,8 +406,19 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
weights = hypernetwork.weights()
for weight in weights:
weight.requires_grad = True
- # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc...
- optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
+ # Here we use optimizer from saved HN, or we can specify as UI option.
+ if (optimizer_name := hypernetwork.optimizer_name) in optimizer_dict:
+ optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate)
+ else:
+ print(f"Optimizer type {optimizer_name} is not defined!")
+ optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate)
+ optimizer_name = 'AdamW'
+ if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer.
+ try:
+ optimizer.load_state_dict(hypernetwork.optimizer_state_dict)
+ except RuntimeError as e:
+ print("Cannot resume from saved optimizer!")
+ print(e)
steps_without_grad = 0
@@ -455,8 +480,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
# Before saving, change name to match current checkpoint.
hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}'
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt')
+ hypernetwork.optimizer_name = optimizer_name
+ if shared.opts.save_optimizer_state:
+ hypernetwork.optimizer_state_dict = optimizer.state_dict()
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file)
-
+ hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
"loss": f"{previous_mean_loss:.7f}",
"learn_rate": scheduler.learn_rate
@@ -514,14 +542,18 @@ Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}
"""
-
report_statistics(loss_dict)
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
+ hypernetwork.optimizer_name = optimizer_name
+ if shared.opts.save_optimizer_state:
+ hypernetwork.optimizer_state_dict = optimizer.state_dict()
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename)
-
+ del optimizer
+ hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
return hypernetwork, filename
+
def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename):
old_hypernetwork_name = hypernetwork.name
old_sd_checkpoint = hypernetwork.sd_checkpoint if hasattr(hypernetwork, "sd_checkpoint") else None
--
cgit v1.2.3
From 0b143c1163a96b193a4e8512be9c5831c661a50d Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Thu, 3 Nov 2022 14:30:53 +0900
Subject: Separate .optim file from model
---
modules/hypernetworks/hypernetwork.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 8f74cdea..63c25de8 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -161,6 +161,7 @@ class Hypernetwork:
def save(self, filename):
state_dict = {}
+ optimizer_saved_dict = {}
for k, v in self.layers.items():
state_dict[k] = (v[0].state_dict(), v[1].state_dict())
@@ -175,9 +176,10 @@ class Hypernetwork:
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
if self.optimizer_name is not None:
- state_dict['optimizer_name'] = self.optimizer_name
+ optimizer_saved_dict['optimizer_name'] = self.optimizer_name
if self.optimizer_state_dict:
- state_dict['optimizer_state_dict'] = self.optimizer_state_dict
+ optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict
+ torch.save(optimizer_saved_dict, filename + '.optim')
torch.save(state_dict, filename)
@@ -198,9 +200,11 @@ class Hypernetwork:
print(f"Layer norm is set to {self.add_layer_norm}")
self.use_dropout = state_dict.get('use_dropout', False)
print(f"Dropout usage is set to {self.use_dropout}")
- self.optimizer_name = state_dict.get('optimizer_name', 'AdamW')
+
+ optimizer_saved_dict = torch.load(self.filename + '.optim', map_location = 'cpu') if os.path.exists(self.filename + '.optim') else {}
+ self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW')
print(f"Optimizer name is {self.optimizer_name}")
- self.optimizer_state_dict = state_dict.get('optimizer_state_dict', None)
+ self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None)
if self.optimizer_state_dict:
print("Loaded existing optimizer from checkpoint")
else:
--
cgit v1.2.3
From 1764ac3c8bc482bd575987850e96630d9115e51a Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Thu, 3 Nov 2022 14:49:26 +0900
Subject: use hash to check valid optim
---
modules/hypernetworks/hypernetwork.py | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 63c25de8..4230b8cf 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -177,12 +177,13 @@ class Hypernetwork:
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
if self.optimizer_name is not None:
optimizer_saved_dict['optimizer_name'] = self.optimizer_name
+
+ torch.save(state_dict, filename)
if self.optimizer_state_dict:
+ optimizer_saved_dict['hash'] = sd_models.model_hash(filename)
optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict
torch.save(optimizer_saved_dict, filename + '.optim')
- torch.save(state_dict, filename)
-
def load(self, filename):
self.filename = filename
if self.name is None:
@@ -204,7 +205,10 @@ class Hypernetwork:
optimizer_saved_dict = torch.load(self.filename + '.optim', map_location = 'cpu') if os.path.exists(self.filename + '.optim') else {}
self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW')
print(f"Optimizer name is {self.optimizer_name}")
- self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None)
+ if sd_models.model_hash(filename) == optimizer_saved_dict.get('hash', None):
+ self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None)
+ else:
+ self.optimizer_state_dict = None
if self.optimizer_state_dict:
print("Loaded existing optimizer from checkpoint")
else:
@@ -229,7 +233,7 @@ def list_hypernetworks(path):
name = os.path.splitext(os.path.basename(filename))[0]
# Prevent a hypothetical "None.pt" from being listed.
if name != "None":
- res[name] = filename
+ res[name + f"({sd_models.model_hash(filename)})"] = filename
return res
@@ -375,6 +379,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
else:
hypernetwork_dir = None
+ hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0]
if create_image_every > 0:
images_dir = os.path.join(log_directory, "images")
os.makedirs(images_dir, exist_ok=True)
--
cgit v1.2.3
From 0abb39f461baa343ae7c23abffb261e57c3168d4 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Fri, 4 Nov 2022 15:47:19 +0900
Subject: resolve conflict - first revert
---
modules/hypernetworks/hypernetwork.py | 123 ++++++++++++++--------------------
1 file changed, 52 insertions(+), 71 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 4230b8cf..674fcedd 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -21,7 +21,6 @@ from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_norm
from collections import defaultdict, deque
from statistics import stdev, mean
-optimizer_dict = {optim_name : cls_obj for optim_name, cls_obj in inspect.getmembers(torch.optim, inspect.isclass) if optim_name != "Optimizer"}
class HypernetworkModule(torch.nn.Module):
multiplier = 1.0
@@ -34,9 +33,12 @@ class HypernetworkModule(torch.nn.Module):
"tanh": torch.nn.Tanh,
"sigmoid": torch.nn.Sigmoid,
}
- activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
+ activation_dict.update(
+ {cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if
+ inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
- def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False):
+ def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal',
+ add_layer_norm=False, use_dropout=False):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
@@ -47,7 +49,7 @@ class HypernetworkModule(torch.nn.Module):
for i in range(len(layer_structure) - 1):
# Add a fully-connected layer
- linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
+ linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i + 1])))
# Add an activation func
if activation_func == "linear" or activation_func is None:
@@ -59,7 +61,7 @@ class HypernetworkModule(torch.nn.Module):
# Add layer normalization
if add_layer_norm:
- linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
+ linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i + 1])))
# Add dropout expect last layer
if use_dropout and i < len(layer_structure) - 3:
@@ -128,7 +130,8 @@ class Hypernetwork:
filename = None
name = None
- def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None,
+ add_layer_norm=False, use_dropout=False):
self.filename = None
self.name = name
self.layers = {}
@@ -140,13 +143,13 @@ class Hypernetwork:
self.weight_init = weight_init
self.add_layer_norm = add_layer_norm
self.use_dropout = use_dropout
- self.optimizer_name = None
- self.optimizer_state_dict = None
for size in enable_sizes or []:
self.layers[size] = (
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout),
)
def weights(self):
@@ -161,7 +164,6 @@ class Hypernetwork:
def save(self, filename):
state_dict = {}
- optimizer_saved_dict = {}
for k, v in self.layers.items():
state_dict[k] = (v[0].state_dict(), v[1].state_dict())
@@ -175,14 +177,8 @@ class Hypernetwork:
state_dict['use_dropout'] = self.use_dropout
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
- if self.optimizer_name is not None:
- optimizer_saved_dict['optimizer_name'] = self.optimizer_name
torch.save(state_dict, filename)
- if self.optimizer_state_dict:
- optimizer_saved_dict['hash'] = sd_models.model_hash(filename)
- optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict
- torch.save(optimizer_saved_dict, filename + '.optim')
def load(self, filename):
self.filename = filename
@@ -202,23 +198,13 @@ class Hypernetwork:
self.use_dropout = state_dict.get('use_dropout', False)
print(f"Dropout usage is set to {self.use_dropout}")
- optimizer_saved_dict = torch.load(self.filename + '.optim', map_location = 'cpu') if os.path.exists(self.filename + '.optim') else {}
- self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW')
- print(f"Optimizer name is {self.optimizer_name}")
- if sd_models.model_hash(filename) == optimizer_saved_dict.get('hash', None):
- self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None)
- else:
- self.optimizer_state_dict = None
- if self.optimizer_state_dict:
- print("Loaded existing optimizer from checkpoint")
- else:
- print("No saved optimizer exists in checkpoint")
-
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (
- HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init,
+ self.add_layer_norm, self.use_dropout),
)
self.name = state_dict.get('name', self.name)
@@ -233,7 +219,7 @@ def list_hypernetworks(path):
name = os.path.splitext(os.path.basename(filename))[0]
# Prevent a hypothetical "None.pt" from being listed.
if name != "None":
- res[name + f"({sd_models.model_hash(filename)})"] = filename
+ res[name] = filename
return res
@@ -330,7 +316,7 @@ def statistics(data):
std = 0
else:
std = stdev(data)
- total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std/ (len(data) ** 0.5):.3f})"
+ total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std / (len(data) ** 0.5):.3f})"
recent_data = data[-32:]
if len(recent_data) < 2:
std = 0
@@ -340,7 +326,7 @@ def statistics(data):
return total_information, recent_information
-def report_statistics(loss_info:dict):
+def report_statistics(loss_info: dict):
keys = sorted(loss_info.keys(), key=lambda x: sum(loss_info[x]) / len(loss_info[x]))
for key in keys:
try:
@@ -352,14 +338,18 @@ def report_statistics(loss_info:dict):
print(e)
-
-def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width,
+ training_height, steps, create_image_every, save_hypernetwork_every, template_file,
+ preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps,
+ preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
# images allows training previews to have infotext. Importing it at the top causes a circular import problem.
from modules import images
save_hypernetwork_every = save_hypernetwork_every or 0
create_image_every = create_image_every or 0
- textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, data_root, template_file, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork")
+ textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, data_root, template_file, steps,
+ save_hypernetwork_every, create_image_every, log_directory,
+ name="hypernetwork")
path = shared.hypernetworks.get(hypernetwork_name, None)
shared.loaded_hypernetwork = Hypernetwork()
@@ -379,7 +369,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
else:
hypernetwork_dir = None
- hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0]
if create_image_every > 0:
images_dir = os.path.join(log_directory, "images")
os.makedirs(images_dir, exist_ok=True)
@@ -395,39 +384,34 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
return hypernetwork, filename
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
-
+
# dataset loading may take a while, so input validations and early returns should be done before this
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
with torch.autocast("cuda"):
- ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
+ ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width,
+ height=training_height,
+ repeats=shared.opts.training_image_repeats_per_epoch,
+ placeholder_token=hypernetwork_name,
+ model=shared.sd_model, device=devices.device,
+ template_file=template_file, include_cond=True,
+ batch_size=batch_size)
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
size = len(ds.indexes)
- loss_dict = defaultdict(lambda : deque(maxlen = 1024))
+ loss_dict = defaultdict(lambda: deque(maxlen=1024))
losses = torch.zeros((size,))
previous_mean_losses = [0]
previous_mean_loss = 0
print("Mean loss of {} elements".format(size))
-
+
weights = hypernetwork.weights()
for weight in weights:
weight.requires_grad = True
- # Here we use optimizer from saved HN, or we can specify as UI option.
- if (optimizer_name := hypernetwork.optimizer_name) in optimizer_dict:
- optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate)
- else:
- print(f"Optimizer type {optimizer_name} is not defined!")
- optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate)
- optimizer_name = 'AdamW'
- if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer.
- try:
- optimizer.load_state_dict(hypernetwork.optimizer_state_dict)
- except RuntimeError as e:
- print("Cannot resume from saved optimizer!")
- print(e)
+ # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc...
+ optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
steps_without_grad = 0
@@ -441,7 +425,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
if len(loss_dict) > 0:
previous_mean_losses = [i[-1] for i in loss_dict.values()]
previous_mean_loss = mean(previous_mean_losses)
-
+
scheduler.apply(optimizer, hypernetwork.step)
if scheduler.finished:
break
@@ -460,7 +444,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
losses[hypernetwork.step % losses.shape[0]] = loss.item()
for entry in entries:
loss_dict[entry.filename].append(loss.item())
-
+
optimizer.zero_grad()
weights[0].grad = None
loss.backward()
@@ -475,9 +459,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
steps_done = hypernetwork.step + 1
- if torch.isnan(losses[hypernetwork.step % losses.shape[0]]):
+ if torch.isnan(losses[hypernetwork.step % losses.shape[0]]):
raise RuntimeError("Loss diverged.")
-
+
if len(previous_mean_losses) > 1:
std = stdev(previous_mean_losses)
else:
@@ -489,11 +473,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
# Before saving, change name to match current checkpoint.
hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}'
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt')
- hypernetwork.optimizer_name = optimizer_name
- if shared.opts.save_optimizer_state:
- hypernetwork.optimizer_state_dict = optimizer.state_dict()
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file)
- hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
+
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
"loss": f"{previous_mean_loss:.7f}",
"learn_rate": scheduler.learn_rate
@@ -529,7 +510,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
preview_text = p.prompt
processed = processing.process_images(p)
- image = processed.images[0] if len(processed.images)>0 else None
+ image = processed.images[0] if len(processed.images) > 0 else None
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
@@ -537,7 +518,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
if image is not None:
shared.state.current_image = image
- last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
+ last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
+ shared.opts.samples_format, processed.infotexts[0],
+ p=p, forced_filename=forced_filename,
+ save_to_dirs=False)
last_saved_image += f", prompt: {preview_text}"
shared.state.job_no = hypernetwork.step
@@ -551,15 +535,12 @@ Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}
"""
+
report_statistics(loss_dict)
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
- hypernetwork.optimizer_name = optimizer_name
- if shared.opts.save_optimizer_state:
- hypernetwork.optimizer_state_dict = optimizer.state_dict()
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename)
- del optimizer
- hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
+
return hypernetwork, filename
@@ -576,4 +557,4 @@ def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename):
hypernetwork.sd_checkpoint = old_sd_checkpoint
hypernetwork.sd_checkpoint_name = old_sd_checkpoint_name
hypernetwork.name = old_hypernetwork_name
- raise
+ raise
\ No newline at end of file
--
cgit v1.2.3
From 0d07cbfa15d34294a4fa22d74359cdd6fe2f799c Mon Sep 17 00:00:00 2001
From: AngelBottomless <35677394+aria1th@users.noreply.github.com>
Date: Fri, 4 Nov 2022 15:50:54 +0900
Subject: I blame code autocomplete
---
modules/hypernetworks/hypernetwork.py | 76 +++++++++++++----------------------
1 file changed, 27 insertions(+), 49 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 674fcedd..a11e01d6 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -33,12 +33,9 @@ class HypernetworkModule(torch.nn.Module):
"tanh": torch.nn.Tanh,
"sigmoid": torch.nn.Sigmoid,
}
- activation_dict.update(
- {cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if
- inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
+ activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
- def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal',
- add_layer_norm=False, use_dropout=False):
+ def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
@@ -49,7 +46,7 @@ class HypernetworkModule(torch.nn.Module):
for i in range(len(layer_structure) - 1):
# Add a fully-connected layer
- linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i + 1])))
+ linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
# Add an activation func
if activation_func == "linear" or activation_func is None:
@@ -61,7 +58,7 @@ class HypernetworkModule(torch.nn.Module):
# Add layer normalization
if add_layer_norm:
- linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i + 1])))
+ linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
# Add dropout expect last layer
if use_dropout and i < len(layer_structure) - 3:
@@ -130,8 +127,7 @@ class Hypernetwork:
filename = None
name = None
- def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None,
- add_layer_norm=False, use_dropout=False):
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
self.filename = None
self.name = name
self.layers = {}
@@ -146,10 +142,8 @@ class Hypernetwork:
for size in enable_sizes or []:
self.layers[size] = (
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
- self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
- self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
)
def weights(self):
@@ -196,15 +190,13 @@ class Hypernetwork:
self.add_layer_norm = state_dict.get('is_layer_norm', False)
print(f"Layer norm is set to {self.add_layer_norm}")
self.use_dropout = state_dict.get('use_dropout', False)
- print(f"Dropout usage is set to {self.use_dropout}")
+ print(f"Dropout usage is set to {self.use_dropout}" )
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (
- HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init,
- self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init,
- self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
)
self.name = state_dict.get('name', self.name)
@@ -316,7 +308,7 @@ def statistics(data):
std = 0
else:
std = stdev(data)
- total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std / (len(data) ** 0.5):.3f})"
+ total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std/ (len(data) ** 0.5):.3f})"
recent_data = data[-32:]
if len(recent_data) < 2:
std = 0
@@ -326,7 +318,7 @@ def statistics(data):
return total_information, recent_information
-def report_statistics(loss_info: dict):
+def report_statistics(loss_info:dict):
keys = sorted(loss_info.keys(), key=lambda x: sum(loss_info[x]) / len(loss_info[x]))
for key in keys:
try:
@@ -338,18 +330,14 @@ def report_statistics(loss_info: dict):
print(e)
-def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width,
- training_height, steps, create_image_every, save_hypernetwork_every, template_file,
- preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps,
- preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+
+def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
# images allows training previews to have infotext. Importing it at the top causes a circular import problem.
from modules import images
save_hypernetwork_every = save_hypernetwork_every or 0
create_image_every = create_image_every or 0
- textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, data_root, template_file, steps,
- save_hypernetwork_every, create_image_every, log_directory,
- name="hypernetwork")
+ textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, data_root, template_file, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork")
path = shared.hypernetworks.get(hypernetwork_name, None)
shared.loaded_hypernetwork = Hypernetwork()
@@ -384,29 +372,23 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
return hypernetwork, filename
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
-
+
# dataset loading may take a while, so input validations and early returns should be done before this
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
with torch.autocast("cuda"):
- ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width,
- height=training_height,
- repeats=shared.opts.training_image_repeats_per_epoch,
- placeholder_token=hypernetwork_name,
- model=shared.sd_model, device=devices.device,
- template_file=template_file, include_cond=True,
- batch_size=batch_size)
+ ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
size = len(ds.indexes)
- loss_dict = defaultdict(lambda: deque(maxlen=1024))
+ loss_dict = defaultdict(lambda : deque(maxlen = 1024))
losses = torch.zeros((size,))
previous_mean_losses = [0]
previous_mean_loss = 0
print("Mean loss of {} elements".format(size))
-
+
weights = hypernetwork.weights()
for weight in weights:
weight.requires_grad = True
@@ -425,7 +407,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
if len(loss_dict) > 0:
previous_mean_losses = [i[-1] for i in loss_dict.values()]
previous_mean_loss = mean(previous_mean_losses)
-
+
scheduler.apply(optimizer, hypernetwork.step)
if scheduler.finished:
break
@@ -444,7 +426,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
losses[hypernetwork.step % losses.shape[0]] = loss.item()
for entry in entries:
loss_dict[entry.filename].append(loss.item())
-
+
optimizer.zero_grad()
weights[0].grad = None
loss.backward()
@@ -459,9 +441,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
steps_done = hypernetwork.step + 1
- if torch.isnan(losses[hypernetwork.step % losses.shape[0]]):
+ if torch.isnan(losses[hypernetwork.step % losses.shape[0]]):
raise RuntimeError("Loss diverged.")
-
+
if len(previous_mean_losses) > 1:
std = stdev(previous_mean_losses)
else:
@@ -510,7 +492,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
preview_text = p.prompt
processed = processing.process_images(p)
- image = processed.images[0] if len(processed.images) > 0 else None
+ image = processed.images[0] if len(processed.images)>0 else None
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
@@ -518,10 +500,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
if image is not None:
shared.state.current_image = image
- last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt,
- shared.opts.samples_format, processed.infotexts[0],
- p=p, forced_filename=forced_filename,
- save_to_dirs=False)
+ last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
last_saved_image += f", prompt: {preview_text}"
shared.state.job_no = hypernetwork.step
@@ -535,7 +514,7 @@ Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}
"""
-
+
report_statistics(loss_dict)
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
@@ -543,7 +522,6 @@ Last saved image: {html.escape(last_saved_image)}
return hypernetwork, filename
-
def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename):
old_hypernetwork_name = hypernetwork.name
old_sd_checkpoint = hypernetwork.sd_checkpoint if hasattr(hypernetwork, "sd_checkpoint") else None
@@ -557,4 +535,4 @@ def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename):
hypernetwork.sd_checkpoint = old_sd_checkpoint
hypernetwork.sd_checkpoint_name = old_sd_checkpoint_name
hypernetwork.name = old_hypernetwork_name
- raise
\ No newline at end of file
+ raise
--
cgit v1.2.3
From 283249d2390f0f3a1c8a55d5d9aa551e3e9b2f9c Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Fri, 4 Nov 2022 15:57:17 +0900
Subject: apply
---
modules/hypernetworks/hypernetwork.py | 54 +++++++++++++++++++++++++++++++----
1 file changed, 49 insertions(+), 5 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 6e1a10cf..de8688a9 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -22,6 +22,8 @@ from collections import defaultdict, deque
from statistics import stdev, mean
+optimizer_dict = {optim_name : cls_obj for optim_name, cls_obj in inspect.getmembers(torch.optim, inspect.isclass) if optim_name != "Optimizer"}
+
class HypernetworkModule(torch.nn.Module):
multiplier = 1.0
activation_dict = {
@@ -142,6 +144,8 @@ class Hypernetwork:
self.use_dropout = use_dropout
self.activate_output = activate_output
self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True
+ self.optimizer_name = None
+ self.optimizer_state_dict = None
for size in enable_sizes or []:
self.layers[size] = (
@@ -163,6 +167,7 @@ class Hypernetwork:
def save(self, filename):
state_dict = {}
+ optimizer_saved_dict = {}
for k, v in self.layers.items():
state_dict[k] = (v[0].state_dict(), v[1].state_dict())
@@ -178,8 +183,15 @@ class Hypernetwork:
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
state_dict['activate_output'] = self.activate_output
state_dict['last_layer_dropout'] = self.last_layer_dropout
-
+
+ if self.optimizer_name is not None:
+ optimizer_saved_dict['optimizer_name'] = self.optimizer_name
+
torch.save(state_dict, filename)
+ if self.optimizer_state_dict:
+ optimizer_saved_dict['hash'] = sd_models.model_hash(filename)
+ optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict
+ torch.save(optimizer_saved_dict, filename + '.optim')
def load(self, filename):
self.filename = filename
@@ -202,6 +214,18 @@ class Hypernetwork:
print(f"Activate last layer is set to {self.activate_output}")
self.last_layer_dropout = state_dict.get('last_layer_dropout', False)
+ optimizer_saved_dict = torch.load(self.filename + '.optim', map_location = 'cpu') if os.path.exists(self.filename + '.optim') else {}
+ self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW')
+ print(f"Optimizer name is {self.optimizer_name}")
+ if sd_models.model_hash(filename) == optimizer_saved_dict.get('hash', None):
+ self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None)
+ else:
+ self.optimizer_state_dict = None
+ if self.optimizer_state_dict:
+ print("Loaded existing optimizer from checkpoint")
+ else:
+ print("No saved optimizer exists in checkpoint")
+
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (
@@ -223,7 +247,7 @@ def list_hypernetworks(path):
name = os.path.splitext(os.path.basename(filename))[0]
# Prevent a hypothetical "None.pt" from being listed.
if name != "None":
- res[name] = filename
+ res[name + f"({sd_models.model_hash(filename)})"] = filename
return res
@@ -369,6 +393,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
else:
hypernetwork_dir = None
+ hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0]
if create_image_every > 0:
images_dir = os.path.join(log_directory, "images")
os.makedirs(images_dir, exist_ok=True)
@@ -404,8 +429,19 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
weights = hypernetwork.weights()
for weight in weights:
weight.requires_grad = True
- # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc...
- optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
+ # Here we use optimizer from saved HN, or we can specify as UI option.
+ if (optimizer_name := hypernetwork.optimizer_name) in optimizer_dict:
+ optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate)
+ else:
+ print(f"Optimizer type {optimizer_name} is not defined!")
+ optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate)
+ optimizer_name = 'AdamW'
+ if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer.
+ try:
+ optimizer.load_state_dict(hypernetwork.optimizer_state_dict)
+ except RuntimeError as e:
+ print("Cannot resume from saved optimizer!")
+ print(e)
steps_without_grad = 0
@@ -467,7 +503,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
# Before saving, change name to match current checkpoint.
hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}'
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt')
+ hypernetwork.optimizer_name = optimizer_name
+ if shared.opts.save_optimizer_state:
+ hypernetwork.optimizer_state_dict = optimizer.state_dict()
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file)
+ hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
"loss": f"{previous_mean_loss:.7f}",
@@ -530,8 +570,12 @@ Last saved image: {html.escape(last_saved_image)}
report_statistics(loss_dict)
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
+ hypernetwork.optimizer_name = optimizer_name
+ if shared.opts.save_optimizer_state:
+ hypernetwork.optimizer_state_dict = optimizer.state_dict()
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename)
-
+ del optimizer
+ hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory.
return hypernetwork, filename
def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename):
--
cgit v1.2.3
From f5d394214d6ee74a682d0a1016bcbebc4b43c13a Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Fri, 4 Nov 2022 16:04:03 +0900
Subject: split before declaring file name
---
modules/hypernetworks/hypernetwork.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index de8688a9..9b6a3e62 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -382,6 +382,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
shared.state.textinfo = "Initializing hypernetwork training..."
shared.state.job_count = steps
+ hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0]
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name)
@@ -393,7 +394,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
else:
hypernetwork_dir = None
- hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0]
if create_image_every > 0:
images_dir = os.path.join(log_directory, "images")
os.makedirs(images_dir, exist_ok=True)
--
cgit v1.2.3
From 1ca0bcd3a7003dd2c1324de7d97fd2a6fc5ddc53 Mon Sep 17 00:00:00 2001
From: aria1th <35677394+aria1th@users.noreply.github.com>
Date: Fri, 4 Nov 2022 16:09:19 +0900
Subject: only save if option is enabled
---
modules/hypernetworks/hypernetwork.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 9b6a3e62..b1f308e2 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -188,7 +188,7 @@ class Hypernetwork:
optimizer_saved_dict['optimizer_name'] = self.optimizer_name
torch.save(state_dict, filename)
- if self.optimizer_state_dict:
+ if shared.opts.save_optimizer_state and self.optimizer_state_dict:
optimizer_saved_dict['hash'] = sd_models.model_hash(filename)
optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict
torch.save(optimizer_saved_dict, filename + '.optim')
--
cgit v1.2.3
From fd62727893f9face287b0a9620251afaa38a627d Mon Sep 17 00:00:00 2001
From: Isaac Poulton
Date: Fri, 4 Nov 2022 18:34:35 +0700
Subject: Sort hypernetworks
---
modules/hypernetworks/hypernetwork.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 6e1a10cf..f1f04a70 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -224,7 +224,7 @@ def list_hypernetworks(path):
# Prevent a hypothetical "None.pt" from being listed.
if name != "None":
res[name] = filename
- return res
+ return dict(sorted(res.items()))
def load_hypernetwork(filename):
--
cgit v1.2.3
From 08feb4c364e8b2aed929fd7d22dfa21a93d78b2c Mon Sep 17 00:00:00 2001
From: Isaac Poulton
Date: Fri, 4 Nov 2022 20:53:11 +0700
Subject: Sort straight out of the glob
---
modules/hypernetworks/hypernetwork.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index f1f04a70..a441ab10 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -219,12 +219,12 @@ class Hypernetwork:
def list_hypernetworks(path):
res = {}
- for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True):
+ for filename in sorted(glob.iglob(os.path.join(path, '**/*.pt'), recursive=True)):
name = os.path.splitext(os.path.basename(filename))[0]
# Prevent a hypothetical "None.pt" from being listed.
if name != "None":
res[name] = filename
- return dict(sorted(res.items()))
+ return res
def load_hypernetwork(filename):
--
cgit v1.2.3
From 62e3d71aa778928d63cab81d9d8cde33e55bebb3 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 5 Nov 2022 17:09:42 +0300
Subject: rework the code to not use the walrus operator because colab's 3.7
does not support it
---
modules/hypernetworks/hypernetwork.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
(limited to 'modules/hypernetworks')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 5ceed6ee..7f182712 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -429,13 +429,16 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
weights = hypernetwork.weights()
for weight in weights:
weight.requires_grad = True
+
# Here we use optimizer from saved HN, or we can specify as UI option.
- if (optimizer_name := hypernetwork.optimizer_name) in optimizer_dict:
+ if hypernetwork.optimizer_name in optimizer_dict:
optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate)
+ optimizer_name = hypernetwork.optimizer_name
else:
- print(f"Optimizer type {optimizer_name} is not defined!")
+ print(f"Optimizer type {hypernetwork.optimizer_name} is not defined!")
optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate)
optimizer_name = 'AdamW'
+
if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer.
try:
optimizer.load_state_dict(hypernetwork.optimizer_state_dict)
--
cgit v1.2.3