From 2a25729623717cc499e873752d9f4ebebd1e1078 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Fri, 28 Oct 2022 09:44:56 +0700 Subject: Gradient clipping in train tab --- modules/hypernetworks/hypernetwork.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8113b35b..c5d60654 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -327,7 +327,7 @@ def report_statistics(loss_info:dict): -def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images @@ -384,6 +384,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if ititial_step > steps: return hypernetwork, filename + clip_grad_mode_value = clip_grad_mode == "value" + clip_grad_mode_norm = clip_grad_mode == "norm" + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) @@ -426,6 +429,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log steps_without_grad = 0 assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' + if clip_grad_mode_value: + torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_value) + elif clip_grad_mode_norm: + torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_value) + optimizer.step() if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): -- cgit v1.2.3