From 1618df41bad092e068c61bf510b1e20856821ad5 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Fri, 28 Oct 2022 10:31:27 +0700 Subject: Gradient clipping for textual embedding --- modules/textual_inversion/textual_inversion.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'modules/textual_inversion/textual_inversion.py') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ff002d3e..7bad73a6 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -206,7 +206,7 @@ def write_loss(log_directory, filename, step, epoch_len, values): }) -def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -256,6 +256,9 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc if ititial_step > steps: return embedding, filename + clip_grad_mode_value = clip_grad_mode == "value" + clip_grad_mode_norm = clip_grad_mode == "norm" + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate) @@ -280,6 +283,12 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc optimizer.zero_grad() loss.backward() + + if clip_grad_mode_value: + torch.nn.utils.clip_grad_value_(embedding.vec, clip_value=clip_grad_value) + elif clip_grad_mode_norm: + torch.nn.utils.clip_grad_norm_(embedding.vec, max_norm=clip_grad_value) + optimizer.step() -- cgit v1.2.3 From 16451ca573220e49f2eaaab97580b6b91287c8c4 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Fri, 28 Oct 2022 17:16:23 +0700 Subject: Learning rate sched syntax support for grad clipping --- modules/textual_inversion/textual_inversion.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'modules/textual_inversion/textual_inversion.py') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 7bad73a6..6b00c6a1 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -255,9 +255,12 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc ititial_step = embedding.step or 0 if ititial_step > steps: return embedding, filename - + clip_grad_mode_value = clip_grad_mode == "value" clip_grad_mode_norm = clip_grad_mode == "norm" + clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm + if clip_grad_enabled: + clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False) scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate) @@ -273,6 +276,9 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc if shared.state.interrupted: break + if clip_grad_enabled: + clip_grad_sched.step(embedding.step) + with torch.autocast("cuda"): c = cond_model([entry.cond_text for entry in entries]) x = torch.stack([entry.latent for entry in entries]).to(devices.device) @@ -285,9 +291,9 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc loss.backward() if clip_grad_mode_value: - torch.nn.utils.clip_grad_value_(embedding.vec, clip_value=clip_grad_value) + torch.nn.utils.clip_grad_value_(embedding.vec, clip_value=clip_grad_sched.learn_rate) elif clip_grad_mode_norm: - torch.nn.utils.clip_grad_norm_(embedding.vec, max_norm=clip_grad_value) + torch.nn.utils.clip_grad_norm_(embedding.vec, max_norm=clip_grad_sched.learn_rate) optimizer.step() -- cgit v1.2.3 From bb832d7725187f8a8ab44faa6ee1b38cb5f600aa Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Sat, 5 Nov 2022 11:48:38 +0700 Subject: Simplify grad clip --- modules/textual_inversion/textual_inversion.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'modules/textual_inversion/textual_inversion.py') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index c567ec3f..687d97bb 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -269,10 +269,10 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - clip_grad_mode_value = clip_grad_mode == "value" - clip_grad_mode_norm = clip_grad_mode == "norm" - clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm - if clip_grad_enabled: + clip_grad = torch.nn.utils.clip_grad_value_ if clip_grad_mode == "value" else \ + torch.nn.utils.clip_grad_norm_ if clip_grad_mode == "norm" else \ + None + if clip_grad: clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False) # dataset loading may take a while, so input validations and early returns should be done before this shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." @@ -302,7 +302,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc if shared.state.interrupted: break - if clip_grad_enabled: + if clip_grad: clip_grad_sched.step(embedding.step) with torch.autocast("cuda"): @@ -316,10 +316,8 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc optimizer.zero_grad() loss.backward() - if clip_grad_mode_value: - torch.nn.utils.clip_grad_value_(embedding.vec, clip_value=clip_grad_sched.learn_rate) - elif clip_grad_mode_norm: - torch.nn.utils.clip_grad_norm_(embedding.vec, max_norm=clip_grad_sched.learn_rate) + if clip_grad: + clip_grad(embedding.vec, clip_grad_sched.learn_rate) optimizer.step() -- cgit v1.2.3