diff options
author | Melan <alexleander91@gmail.com> | 2022-10-20 20:37:16 +0000 |
---|---|---|
committer | Melan <alexleander91@gmail.com> | 2022-10-20 20:37:16 +0000 |
commit | 8f5912984794c4c69e429c4636e984854d911b6a (patch) | |
tree | a5c45c4fc4d212c38f2d0b70cd013d0b2ce69354 /modules/hypernetworks | |
parent | a6d593a6b51dc6a8443f2aa5c24caa391a04cd56 (diff) | |
download | stable-diffusion-webui-gfx803-8f5912984794c4c69e429c4636e984854d911b6a.tar.gz stable-diffusion-webui-gfx803-8f5912984794c4c69e429c4636e984854d911b6a.tar.bz2 stable-diffusion-webui-gfx803-8f5912984794c4c69e429c4636e984854d911b6a.zip |
Some changes to the tensorboard code and hypernetwork support
Diffstat (limited to 'modules/hypernetworks')
-rw-r--r-- | modules/hypernetworks/hypernetwork.py | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 74300122..5e919775 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -4,6 +4,7 @@ import html import os
import sys
import traceback
+import tensorboard
import tqdm
import csv
@@ -18,7 +19,6 @@ import modules.textual_inversion.dataset from modules.textual_inversion import textual_inversion
from modules.textual_inversion.learn_schedule import LearnRateScheduler
-
class HypernetworkModule(torch.nn.Module):
multiplier = 1.0
@@ -291,6 +291,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
+ if shared.opts.training_enable_tensorboard:
+ tensorboard_writer = textual_inversion.tensorboard_setup(log_directory)
+
pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step)
for i, entries in pbar:
hypernetwork.step = i + ititial_step
@@ -315,6 +318,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer.zero_grad()
loss.backward()
optimizer.step()
+
mean_loss = losses.mean()
if torch.isnan(mean_loss):
raise RuntimeError("Loss diverged.")
@@ -323,6 +327,14 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt')
hypernetwork.save(last_saved_file)
+
+ if shared.opts.training_enable_tensorboard:
+ epoch_num = hypernetwork.step // len(ds)
+ epoch_step = hypernetwork.step - (epoch_num * len(ds)) + 1
+
+ textual_inversion.tensorboard_add(tensorboard_writer, loss=mean_loss,
+ global_step=hypernetwork.step, step=epoch_step,
+ learn_rate=scheduler.learn_rate, epoch_num=epoch_num)
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
"loss": f"{mean_loss:.7f}",
@@ -360,6 +372,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log processed = processing.process_images(p)
image = processed.images[0] if len(processed.images)>0 else None
+ if shared.opts.training_enable_tensorboard and shared.opts.training_tensorboard_save_images:
+ textual_inversion.tensorboard_add_image(tensorboard_writer, f"Validation at epoch {epoch_num}",
+ image, hypernetwork.step)
+
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
|