From eea8fc40e16664ddc8a9aec77206da704a35dde0 Mon Sep 17 00:00:00 2001
From: timntorres <timothynarcisotorres@gmail.com>
Date: Thu, 5 Jan 2023 07:24:22 -0800
Subject: Add option to save ti settings to file.

---
 modules/textual_inversion/textual_inversion.py | 30 +++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'modules/textual_inversion/textual_inversion.py')

diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 71e07bcc..2bed2ecb 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -1,6 +1,7 @@
 import os
 import sys
 import traceback
+import inspect
 
 import torch
 import tqdm
@@ -229,6 +230,28 @@ def write_loss(log_directory, filename, step, epoch_len, values):
             **values,
         })
 
+def save_settings_to_file(initial_step, num_of_dataset_images, embedding_name, vectors_per_token, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+    checkpoint = sd_models.select_checkpoint()
+    model_name = checkpoint.model_name
+    model_hash = '[{}]'.format(checkpoint.hash)
+
+    # Get a list of the argument names.
+    arg_names = inspect.getfullargspec(save_settings_to_file).args
+    
+    # Create a list of the argument names to include in the settings string.
+    names = arg_names[:16]  # Include all arguments up until the preview-related ones.
+    if preview_from_txt2img:
+        names.extend(arg_names[16:])  # Include all remaining arguments if `preview_from_txt2img` is True.
+
+    # Build the settings string.
+    settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
+    for name in names:
+        value = locals()[name]
+        settings_str += f"{name}: {value}\n"
+
+    with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout:
+        fout.write(settings_str + "\n\n")
+
 def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_model_every, create_image_every, log_directory, name="embedding"):
     assert model_name, f"{name} not selected"
     assert learn_rate, "Learning rate is empty or 0"
@@ -292,13 +315,13 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
     if initial_step >= steps:
         shared.state.textinfo = "Model has already been trained beyond specified max steps"
         return embedding, filename
+    
     scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
-
     clip_grad = torch.nn.utils.clip_grad_value_ if clip_grad_mode == "value" else \
         torch.nn.utils.clip_grad_norm_ if clip_grad_mode == "norm" else \
         None
     if clip_grad:
-        clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False)
+        clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, initial_step, verbose=False)
     # dataset loading may take a while, so input validations and early returns should be done before this
     shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
     old_parallel_processing_allowed = shared.parallel_processing_allowed
@@ -306,7 +329,8 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
     pin_memory = shared.opts.pin_memory
 
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
-
+    if shared.opts.save_train_settings_to_txt:
+            save_settings_to_file(initial_step , len(ds) , embedding_name, len(embedding.vec) , learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
     latent_sampling_method = ds.latent_sampling_method
 
     dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory)
-- 
cgit v1.2.3


From b85c2b5cf4a6809bc871718cf4680d49c3e95e94 Mon Sep 17 00:00:00 2001
From: timntorres <timothynarcisotorres@gmail.com>
Date: Thu, 5 Jan 2023 08:14:38 -0800
Subject: Clean up ti, add same behavior to hypernetwork.

---
 modules/hypernetworks/hypernetwork.py          | 31 +++++++++++++++++++++++++-
 modules/shared.py                              |  2 +-
 modules/textual_inversion/textual_inversion.py | 14 +++++++-----
 3 files changed, 40 insertions(+), 7 deletions(-)

(limited to 'modules/textual_inversion/textual_inversion.py')

diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 6a9b1398..d5985263 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -401,7 +401,33 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None,
     hypernet.save(fn)
 
     shared.reload_hypernetworks()
+# Note: textual_inversion.py has a nearly identical function of the same name.
+def save_settings_to_file(initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+    checkpoint = sd_models.select_checkpoint()
+    model_name = checkpoint.model_name
+    model_hash = '[{}]'.format(checkpoint.hash)
+    # Starting index of preview-related arguments.
+    border_index = 19
+
+    # Get a list of the argument names, excluding default argument.
+    sig = inspect.signature(save_settings_to_file)
+    arg_names = [p.name for p in sig.parameters.values() if p.default == p.empty]
+    
+    # Create a list of the argument names to include in the settings string.
+    names = arg_names[:border_index]  # Include all arguments up until the preview-related ones.
+
+    # Include preview-related arguments if applicable.
+    if preview_from_txt2img:
+        names.extend(arg_names[border_index:])
+
+    # Build the settings string.
+    settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
+    for name in names:
+        value = locals()[name]
+        settings_str += f"{name}: {value}\n"
 
+    with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout:
+        fout.write(settings_str + "\n\n")
 
 def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     # images allows training previews to have infotext. Importing it at the top causes a circular import problem.
@@ -457,7 +483,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
     pin_memory = shared.opts.pin_memory
 
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
-    
+
+    if shared.opts.save_training_settings_to_txt:
+        save_settings_to_file(initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+
     latent_sampling_method = ds.latent_sampling_method
 
     dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory)
diff --git a/modules/shared.py b/modules/shared.py
index 933cd738..10231a75 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -362,7 +362,7 @@ options_templates.update(options_section(('training', "Training"), {
     "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training if possible. Saves VRAM."),
     "pin_memory": OptionInfo(False, "Turn on pin_memory for DataLoader. Makes training slightly faster but can increase memory usage."),
     "save_optimizer_state": OptionInfo(False, "Saves Optimizer state as separate *.optim file. Training of embedding or HN can be resumed with the matching optim file."),
-    "save_train_settings_to_txt": OptionInfo(False, "Save textual inversion and hypernet settings to a text file when training starts."),
+    "save_training_settings_to_txt": OptionInfo(False, "Save textual inversion and hypernet settings to a text file whenever training starts."),
     "dataset_filename_word_regex": OptionInfo("", "Filename word regex"),
     "dataset_filename_join_string": OptionInfo(" ", "Filename join string"),
     "training_image_repeats_per_epoch": OptionInfo(1, "Number of repeats for a single input image per epoch; used only for displaying epoch number", gr.Number, {"precision": 0}),
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 2bed2ecb..68648550 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -230,18 +230,20 @@ def write_loss(log_directory, filename, step, epoch_len, values):
             **values,
         })
 
+# Note: hypernetwork.py has a nearly identical function of the same name. 
 def save_settings_to_file(initial_step, num_of_dataset_images, embedding_name, vectors_per_token, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     checkpoint = sd_models.select_checkpoint()
     model_name = checkpoint.model_name
     model_hash = '[{}]'.format(checkpoint.hash)
-
+    # Starting index of preview-related arguments.
+    border_index = 16
     # Get a list of the argument names.
     arg_names = inspect.getfullargspec(save_settings_to_file).args
     
     # Create a list of the argument names to include in the settings string.
-    names = arg_names[:16]  # Include all arguments up until the preview-related ones.
+    names = arg_names[:border_index]  # Include all arguments up until the preview-related ones.
     if preview_from_txt2img:
-        names.extend(arg_names[16:])  # Include all remaining arguments if `preview_from_txt2img` is True.
+        names.extend(arg_names[border_index:])  # Include all remaining arguments if `preview_from_txt2img` is True.
 
     # Build the settings string.
     settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
@@ -329,8 +331,10 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
     pin_memory = shared.opts.pin_memory
 
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
-    if shared.opts.save_train_settings_to_txt:
-            save_settings_to_file(initial_step , len(ds) , embedding_name, len(embedding.vec) , learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+
+    if shared.opts.save_training_settings_to_txt:
+            save_settings_to_file(initial_step, len(ds), embedding_name, len(embedding.vec), learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+
     latent_sampling_method = ds.latent_sampling_method
 
     dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory)
-- 
cgit v1.2.3


From b6bab2f052b32c0ffebe6aecc1819ccf20cf8c5d Mon Sep 17 00:00:00 2001
From: timntorres <timothynarcisotorres@gmail.com>
Date: Thu, 5 Jan 2023 09:14:56 -0800
Subject: Include model in log file. Exclude directory.

---
 modules/hypernetworks/hypernetwork.py          | 28 +++++++++-----------------
 modules/textual_inversion/textual_inversion.py | 22 +++++++++-----------
 2 files changed, 19 insertions(+), 31 deletions(-)

(limited to 'modules/textual_inversion/textual_inversion.py')

diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index d5985263..3237c37a 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -402,30 +402,22 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None,
 
     shared.reload_hypernetworks()
 # Note: textual_inversion.py has a nearly identical function of the same name.
-def save_settings_to_file(initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
-    checkpoint = sd_models.select_checkpoint()
-    model_name = checkpoint.model_name
-    model_hash = '[{}]'.format(checkpoint.hash)
+def save_settings_to_file(model_name, model_hash, initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     # Starting index of preview-related arguments.
-    border_index = 19
-
-    # Get a list of the argument names, excluding default argument.
-    sig = inspect.signature(save_settings_to_file)
-    arg_names = [p.name for p in sig.parameters.values() if p.default == p.empty]
-    
+    border_index = 21
+    # Get a list of the argument names.
+    arg_names = inspect.getfullargspec(save_settings_to_file).args
     # Create a list of the argument names to include in the settings string.
     names = arg_names[:border_index]  # Include all arguments up until the preview-related ones.
-
-    # Include preview-related arguments if applicable.
     if preview_from_txt2img:
-        names.extend(arg_names[border_index:])
-
+        names.extend(arg_names[border_index:])  # Include preview-related arguments if applicable.
     # Build the settings string.
     settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
     for name in names:
-        value = locals()[name]
-        settings_str += f"{name}: {value}\n"
-
+        if name != 'log_directory': # It's useless and redundant to save log_directory.
+            value = locals()[name]
+            settings_str += f"{name}: {value}\n"
+    # Create or append to the file.
     with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout:
         fout.write(settings_str + "\n\n")
 
@@ -485,7 +477,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
 
     if shared.opts.save_training_settings_to_txt:
-        save_settings_to_file(initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+        save_settings_to_file(checkpoint.model_name, '[{}]'.format(checkpoint.hash), initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
 
     latent_sampling_method = ds.latent_sampling_method
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 68648550..ce7e4f5d 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -231,26 +231,22 @@ def write_loss(log_directory, filename, step, epoch_len, values):
         })
 
 # Note: hypernetwork.py has a nearly identical function of the same name. 
-def save_settings_to_file(initial_step, num_of_dataset_images, embedding_name, vectors_per_token, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
-    checkpoint = sd_models.select_checkpoint()
-    model_name = checkpoint.model_name
-    model_hash = '[{}]'.format(checkpoint.hash)
+def save_settings_to_file(model_name, model_hash, initial_step, num_of_dataset_images, embedding_name, vectors_per_token, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     # Starting index of preview-related arguments.
-    border_index = 16
+    border_index = 18
     # Get a list of the argument names.
-    arg_names = inspect.getfullargspec(save_settings_to_file).args
-    
+    arg_names = inspect.getfullargspec(save_settings_to_file).args    
     # Create a list of the argument names to include in the settings string.
     names = arg_names[:border_index]  # Include all arguments up until the preview-related ones.
     if preview_from_txt2img:
-        names.extend(arg_names[border_index:])  # Include all remaining arguments if `preview_from_txt2img` is True.
-
+        names.extend(arg_names[border_index:])  # Include preview-related arguments if applicable.
     # Build the settings string.
     settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
     for name in names:
-        value = locals()[name]
-        settings_str += f"{name}: {value}\n"
-
+        if name != 'log_directory': # It's useless and redundant to save log_directory.
+            value = locals()[name]
+            settings_str += f"{name}: {value}\n"
+    # Create or append to the file.
     with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout:
         fout.write(settings_str + "\n\n")
 
@@ -333,7 +329,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
 
     if shared.opts.save_training_settings_to_txt:
-            save_settings_to_file(initial_step, len(ds), embedding_name, len(embedding.vec), learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+            save_settings_to_file(checkpoint.model_name, '[{}]'.format(checkpoint.hash), initial_step, len(ds), embedding_name, len(embedding.vec), learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
 
     latent_sampling_method = ds.latent_sampling_method
 
-- 
cgit v1.2.3


From 81133d4168ae0bae9bf8bf1a1d4983319a589112 Mon Sep 17 00:00:00 2001
From: Faber <faber8164@gmail.com>
Date: Fri, 6 Jan 2023 03:38:37 +0700
Subject: allow loading embeddings from subdirectories

---
 modules/textual_inversion/textual_inversion.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'modules/textual_inversion/textual_inversion.py')

diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 24b43045..0a059044 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -149,19 +149,20 @@ class EmbeddingDatabase:
             else:
                 self.skipped_embeddings[name] = embedding
 
-        for fn in os.listdir(self.embeddings_dir):
-            try:
-                fullfn = os.path.join(self.embeddings_dir, fn)
-
-                if os.stat(fullfn).st_size == 0:
+        for root, dirs, fns in os.walk(self.embeddings_dir):
+            for fn in fns:
+                try:
+                    fullfn = os.path.join(root, fn)
+
+                    if os.stat(fullfn).st_size == 0:
+                        continue
+
+                    process_file(fullfn, fn)
+                except Exception:
+                    print(f"Error loading embedding {fn}:", file=sys.stderr)
+                    print(traceback.format_exc(), file=sys.stderr)
                     continue
 
-                process_file(fullfn, fn)
-            except Exception:
-                print(f"Error loading embedding {fn}:", file=sys.stderr)
-                print(traceback.format_exc(), file=sys.stderr)
-                continue
-
         print(f"Textual inversion embeddings loaded({len(self.word_embeddings)}): {', '.join(self.word_embeddings.keys())}")
         if len(self.skipped_embeddings) > 0:
             print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings.keys())}")
-- 
cgit v1.2.3


From 683287d87f6401083a8d63eedc00ca7410214ca1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Fri, 6 Jan 2023 08:52:06 +0300
Subject: rework saving training params to file #6372

---
 modules/hypernetworks/hypernetwork.py          | 28 +++++++-------------------
 modules/shared.py                              |  2 +-
 modules/textual_inversion/logging.py           | 24 ++++++++++++++++++++++
 modules/textual_inversion/textual_inversion.py | 23 +++------------------
 4 files changed, 35 insertions(+), 42 deletions(-)
 create mode 100644 modules/textual_inversion/logging.py

(limited to 'modules/textual_inversion/textual_inversion.py')

diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 3237c37a..b0cfbe71 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -13,7 +13,7 @@ import tqdm
 from einops import rearrange, repeat
 from ldm.util import default
 from modules import devices, processing, sd_models, shared, sd_samplers
-from modules.textual_inversion import textual_inversion
+from modules.textual_inversion import textual_inversion, logging
 from modules.textual_inversion.learn_schedule import LearnRateScheduler
 from torch import einsum
 from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_
@@ -401,25 +401,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None,
     hypernet.save(fn)
 
     shared.reload_hypernetworks()
-# Note: textual_inversion.py has a nearly identical function of the same name.
-def save_settings_to_file(model_name, model_hash, initial_step, num_of_dataset_images, hypernetwork_name, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
-    # Starting index of preview-related arguments.
-    border_index = 21
-    # Get a list of the argument names.
-    arg_names = inspect.getfullargspec(save_settings_to_file).args
-    # Create a list of the argument names to include in the settings string.
-    names = arg_names[:border_index]  # Include all arguments up until the preview-related ones.
-    if preview_from_txt2img:
-        names.extend(arg_names[border_index:])  # Include preview-related arguments if applicable.
-    # Build the settings string.
-    settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
-    for name in names:
-        if name != 'log_directory': # It's useless and redundant to save log_directory.
-            value = locals()[name]
-            settings_str += f"{name}: {value}\n"
-    # Create or append to the file.
-    with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout:
-        fout.write(settings_str + "\n\n")
+
 
 def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
     # images allows training previews to have infotext. Importing it at the top causes a circular import problem.
@@ -477,7 +459,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
 
     if shared.opts.save_training_settings_to_txt:
-        save_settings_to_file(checkpoint.model_name, '[{}]'.format(checkpoint.hash), initial_step, len(ds), hypernetwork_name, hypernetwork.layer_structure, hypernetwork.activation_func, hypernetwork.weight_init, hypernetwork.add_layer_norm, hypernetwork.use_dropout, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+        saved_params = dict(
+            model_name=checkpoint.model_name, model_hash=checkpoint.hash, num_of_dataset_images=len(ds),
+            **{field: getattr(hypernetwork, field) for field in ['layer_structure', 'activation_func', 'weight_init', 'add_layer_norm', 'use_dropout', ]}
+        )
+        logging.save_settings_to_file(log_directory, {**saved_params, **locals()})
 
     latent_sampling_method = ds.latent_sampling_method
 
diff --git a/modules/shared.py b/modules/shared.py
index f0e10b35..57e489d0 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -362,7 +362,7 @@ options_templates.update(options_section(('training', "Training"), {
     "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training if possible. Saves VRAM."),
     "pin_memory": OptionInfo(False, "Turn on pin_memory for DataLoader. Makes training slightly faster but can increase memory usage."),
     "save_optimizer_state": OptionInfo(False, "Saves Optimizer state as separate *.optim file. Training of embedding or HN can be resumed with the matching optim file."),
-    "save_training_settings_to_txt": OptionInfo(False, "Save textual inversion and hypernet settings to a text file whenever training starts."),
+    "save_training_settings_to_txt": OptionInfo(True, "Save textual inversion and hypernet settings to a text file whenever training starts."),
     "dataset_filename_word_regex": OptionInfo("", "Filename word regex"),
     "dataset_filename_join_string": OptionInfo(" ", "Filename join string"),
     "training_image_repeats_per_epoch": OptionInfo(1, "Number of repeats for a single input image per epoch; used only for displaying epoch number", gr.Number, {"precision": 0}),
diff --git a/modules/textual_inversion/logging.py b/modules/textual_inversion/logging.py
new file mode 100644
index 00000000..8b1981d5
--- /dev/null
+++ b/modules/textual_inversion/logging.py
@@ -0,0 +1,24 @@
+import datetime
+import json
+import os
+
+saved_params_shared = {"model_name", "model_hash", "initial_step", "num_of_dataset_images", "learn_rate", "batch_size", "data_root", "log_directory", "training_width", "training_height", "steps", "create_image_every", "template_file"}
+saved_params_ti = {"embedding_name", "num_vectors_per_token", "save_embedding_every", "save_image_with_stored_embedding"}
+saved_params_hypernet = {"hypernetwork_name", "layer_structure", "activation_func", "weight_init", "add_layer_norm", "use_dropout", "save_hypernetwork_every"}
+saved_params_all = saved_params_shared | saved_params_ti | saved_params_hypernet
+saved_params_previews = {"preview_prompt", "preview_negative_prompt", "preview_steps", "preview_sampler_index", "preview_cfg_scale", "preview_seed", "preview_width", "preview_height"}
+
+
+def save_settings_to_file(log_directory, all_params):
+    now = datetime.datetime.now()
+    params = {"datetime": now.strftime("%Y-%m-%d %H:%M:%S")}
+
+    keys = saved_params_all
+    if all_params.get('preview_from_txt2img'):
+        keys = keys | saved_params_previews
+
+    params.update({k: v for k, v in all_params.items() if k in keys})
+
+    filename = f'settings-{now.strftime("%Y-%m-%d-%H-%M-%S")}.json'
+    with open(os.path.join(log_directory, filename), "w") as file:
+        json.dump(params, file, indent=4)
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index e9cf432f..f9f5e8cd 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -18,6 +18,8 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler
 from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64,
                                                        insert_image_data_embed, extract_image_data_embed,
                                                        caption_image_overlay)
+from modules.textual_inversion.logging import save_settings_to_file
+
 
 class Embedding:
     def __init__(self, vec, name, step=None):
@@ -231,25 +233,6 @@ def write_loss(log_directory, filename, step, epoch_len, values):
             **values,
         })
 
-# Note: hypernetwork.py has a nearly identical function of the same name. 
-def save_settings_to_file(model_name, model_hash, initial_step, num_of_dataset_images, embedding_name, vectors_per_token, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
-    # Starting index of preview-related arguments.
-    border_index = 18
-    # Get a list of the argument names.
-    arg_names = inspect.getfullargspec(save_settings_to_file).args    
-    # Create a list of the argument names to include in the settings string.
-    names = arg_names[:border_index]  # Include all arguments up until the preview-related ones.
-    if preview_from_txt2img:
-        names.extend(arg_names[border_index:])  # Include preview-related arguments if applicable.
-    # Build the settings string.
-    settings_str = "datetime : " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n"
-    for name in names:
-        if name != 'log_directory': # It's useless and redundant to save log_directory.
-            value = locals()[name]
-            settings_str += f"{name}: {value}\n"
-    # Create or append to the file.
-    with open(os.path.join(log_directory, 'settings.txt'), "a+") as fout:
-        fout.write(settings_str + "\n\n")
 
 def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_model_every, create_image_every, log_directory, name="embedding"):
     assert model_name, f"{name} not selected"
@@ -330,7 +313,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
     ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
 
     if shared.opts.save_training_settings_to_txt:
-            save_settings_to_file(checkpoint.model_name, '[{}]'.format(checkpoint.hash), initial_step, len(ds), embedding_name, len(embedding.vec), learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height)
+        save_settings_to_file(log_directory, {**dict(model_name=checkpoint.model_name, model_hash=checkpoint.hash, num_of_dataset_images=len(ds), num_vectors_per_token=len(embedding.vec)), **locals()})
 
     latent_sampling_method = ds.latent_sampling_method
 
-- 
cgit v1.2.3


From 79e39fae6110c20a3ee6255e2841c877f65e8cbd Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 7 Jan 2023 01:45:28 +0300
Subject: CLIP hijack rework

---
 modules/sd_hijack.py                           |   6 +-
 modules/sd_hijack_clip.py                      | 348 ++++++++++++-------------
 modules/sd_hijack_clip_old.py                  |  81 ++++++
 modules/textual_inversion/textual_inversion.py |   1 -
 modules/ui.py                                  |   2 +-
 5 files changed, 256 insertions(+), 182 deletions(-)
 create mode 100644 modules/sd_hijack_clip_old.py

(limited to 'modules/textual_inversion/textual_inversion.py')

diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index fa2cd4bb..71cc145a 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -150,10 +150,10 @@ class StableDiffusionModelHijack:
     def clear_comments(self):
         self.comments = []
 
-    def tokenize(self, text):
-        _, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text])
+    def get_prompt_lengths(self, text):
+        _, token_count = self.clip.process_texts([text])
 
-        return remade_batch_tokens[0], token_count, sd_hijack_clip.get_target_prompt_token_count(token_count)
+        return token_count, self.clip.get_target_prompt_token_count(token_count)
 
 
 class EmbeddingsWithFixes(torch.nn.Module):
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index ca92b142..ac3020d7 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -1,12 +1,28 @@
 import math
+from collections import namedtuple
 
 import torch
 
 from modules import prompt_parser, devices
 from modules.shared import opts
 
-def get_target_prompt_token_count(token_count):
-    return math.ceil(max(token_count, 1) / 75) * 75
+
+class PromptChunk:
+    """
+    This object contains token ids, weight (multipliers:1.4) and textual inversion embedding info for a chunk of prompt.
+    If a prompt is short, it is represented by one PromptChunk, otherwise, multiple are necessary.
+    Each PromptChunk contains an exact amount of tokens - 77, which includes one for start and end token,
+    so just 75 tokens from prompt.
+    """
+
+    def __init__(self):
+        self.tokens = []
+        self.multipliers = []
+        self.fixes = []
+
+
+PromptChunkFix = namedtuple('PromptChunkFix', ['offset', 'embedding'])
+"""This is a marker showing that textual inversion embedding's vectors have to placed at offset in the prompt chunk"""
 
 
 class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
@@ -14,17 +30,49 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         super().__init__()
         self.wrapped = wrapped
         self.hijack = hijack
+        self.chunk_length = 75
+
+    def empty_chunk(self):
+        """creates an empty PromptChunk and returns it"""
+
+        chunk = PromptChunk()
+        chunk.tokens = [self.id_start] + [self.id_end] * (self.chunk_length + 1)
+        chunk.multipliers = [1.0] * (self.chunk_length + 2)
+        return chunk
+
+    def get_target_prompt_token_count(self, token_count):
+        """returns the maximum number of tokens a prompt of a known length can have before it requires one more PromptChunk to be represented"""
+
+        return math.ceil(max(token_count, 1) / self.chunk_length) * self.chunk_length
 
     def tokenize(self, texts):
+        """Converts a batch of texts into a batch of token ids"""
+
         raise NotImplementedError
 
     def encode_with_transformers(self, tokens):
+        """
+        converts a batch of token ids (in python lists) into a single tensor with numeric respresentation of those tokens;
+        All python lists with tokens are assumed to have same length, usually 77.
+        if input is a list with B elements and each element has T tokens, expected output shape is (B, T, C), where C depends on
+        model - can be 768 and 1024
+        """
+
         raise NotImplementedError
 
     def encode_embedding_init_text(self, init_text, nvpt):
+        """Converts text into a tensor with this text's tokens' embeddings. Note that those are embeddings before they are passed through
+        transformers. nvpt is used as a maximum length in tokens. If text produces less teokens than nvpt, only this many is returned."""
+
         raise NotImplementedError
 
-    def tokenize_line(self, line, used_custom_terms, hijack_comments):
+    def tokenize_line(self, line):
+        """
+        this transforms a single prompt into a list of PromptChunk objects - as many as needed to
+        represent the prompt.
+        Returns the list and the total number of tokens in the prompt.
+        """
+
         if opts.enable_emphasis:
             parsed = prompt_parser.parse_prompt_attention(line)
         else:
@@ -32,205 +80,152 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
 
         tokenized = self.tokenize([text for text, _ in parsed])
 
-        fixes = []
-        remade_tokens = []
-        multipliers = []
+        chunks = []
+        chunk = PromptChunk()
+        token_count = 0
         last_comma = -1
 
-        for tokens, (text, weight) in zip(tokenized, parsed):
-            i = 0
-            while i < len(tokens):
-                token = tokens[i]
+        def next_chunk():
+            """puts current chunk into the list of results and produces the next one - empty"""
+            nonlocal token_count
+            nonlocal last_comma
+            nonlocal chunk
+
+            token_count += len(chunk.tokens)
+            to_add = self.chunk_length - len(chunk.tokens)
+            if to_add > 0:
+                chunk.tokens += [self.id_end] * to_add
+                chunk.multipliers += [1.0] * to_add
 
-                embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+            chunk.tokens = [self.id_start] + chunk.tokens + [self.id_end]
+            chunk.multipliers = [1.0] + chunk.multipliers + [1.0]
+
+            last_comma = -1
+            chunks.append(chunk)
+            chunk = PromptChunk()
+
+        for tokens, (text, weight) in zip(tokenized, parsed):
+            position = 0
+            while position < len(tokens):
+                token = tokens[position]
 
                 if token == self.comma_token:
-                    last_comma = len(remade_tokens)
-                elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), 1) % 75 == 0 and last_comma != -1 and len(remade_tokens) - last_comma <= opts.comma_padding_backtrack:
-                    last_comma += 1
-                    reloc_tokens = remade_tokens[last_comma:]
-                    reloc_mults = multipliers[last_comma:]
+                    last_comma = len(chunk.tokens)
+
+                # this is when we are at the end of alloted 75 tokens for the current chunk, and the current token is not a comma. opts.comma_padding_backtrack
+                # is a setting that specifies that is there is a comma nearby, the text after comma should be moved out of this chunk and into the next.
+                elif opts.comma_padding_backtrack != 0 and len(chunk.tokens) == self.chunk_length and last_comma != -1 and len(chunk.tokens) - last_comma <= opts.comma_padding_backtrack:
+                    break_location = last_comma + 1
+
+                    reloc_tokens = chunk.tokens[break_location:]
+                    reloc_mults = chunk.multipliers[break_location:]
 
-                    remade_tokens = remade_tokens[:last_comma]
-                    length = len(remade_tokens)
+                    chunk.tokens = chunk.tokens[:break_location]
+                    chunk.multipliers = chunk.multipliers[:break_location]
 
-                    rem = int(math.ceil(length / 75)) * 75 - length
-                    remade_tokens += [self.id_end] * rem + reloc_tokens
-                    multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults
+                    next_chunk()
+                    chunk.tokens = reloc_tokens
+                    chunk.multipliers = reloc_mults
 
+                if len(chunk.tokens) == self.chunk_length:
+                    next_chunk()
+
+                embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, position)
                 if embedding is None:
-                    remade_tokens.append(token)
-                    multipliers.append(weight)
-                    i += 1
-                else:
-                    emb_len = int(embedding.vec.shape[0])
-                    iteration = len(remade_tokens) // 75
-                    if (len(remade_tokens) + emb_len) // 75 != iteration:
-                        rem = (75 * (iteration + 1) - len(remade_tokens))
-                        remade_tokens += [self.id_end] * rem
-                        multipliers += [1.0] * rem
-                        iteration += 1
-                    fixes.append((iteration, (len(remade_tokens) % 75, embedding)))
-                    remade_tokens += [0] * emb_len
-                    multipliers += [weight] * emb_len
-                    used_custom_terms.append((embedding.name, embedding.checksum()))
-                    i += embedding_length_in_tokens
-
-        token_count = len(remade_tokens)
-        prompt_target_length = get_target_prompt_token_count(token_count)
-        tokens_to_add = prompt_target_length - len(remade_tokens)
-
-        remade_tokens = remade_tokens + [self.id_end] * tokens_to_add
-        multipliers = multipliers + [1.0] * tokens_to_add
-
-        return remade_tokens, fixes, multipliers, token_count
-
-    def process_text(self, texts):
-        used_custom_terms = []
-        remade_batch_tokens = []
-        hijack_comments = []
-        hijack_fixes = []
+                    chunk.tokens.append(token)
+                    chunk.multipliers.append(weight)
+                    position += 1
+                    continue
+
+                emb_len = int(embedding.vec.shape[0])
+                if len(chunk.tokens) + emb_len > self.chunk_length:
+                    next_chunk()
+
+                chunk.fixes.append(PromptChunkFix(len(chunk.tokens), embedding))
+
+                chunk.tokens += [0] * emb_len
+                chunk.multipliers += [weight] * emb_len
+                position += embedding_length_in_tokens
+
+        if len(chunk.tokens) > 0:
+            next_chunk()
+
+        return chunks, token_count
+
+    def process_texts(self, texts):
+        """
+        Accepts a list of texts and calls tokenize_line() on each, with cache. Returns the list of results and maximum
+        length, in tokens, of all texts.
+        """
+
         token_count = 0
 
         cache = {}
-        batch_multipliers = []
+        batch_chunks = []
         for line in texts:
             if line in cache:
-                remade_tokens, fixes, multipliers = cache[line]
+                chunks = cache[line]
             else:
-                remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments)
+                chunks, current_token_count = self.tokenize_line(line)
                 token_count = max(current_token_count, token_count)
 
-                cache[line] = (remade_tokens, fixes, multipliers)
+                cache[line] = chunks
 
-            remade_batch_tokens.append(remade_tokens)
-            hijack_fixes.append(fixes)
-            batch_multipliers.append(multipliers)
+            batch_chunks.append(chunks)
 
-        return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
+        return batch_chunks, token_count
 
-    def process_text_old(self, texts):
-        id_start = self.id_start
-        id_end = self.id_end
-        maxlen = self.wrapped.max_length  # you get to stay at 77
-        used_custom_terms = []
-        remade_batch_tokens = []
-        hijack_comments = []
-        hijack_fixes = []
-        token_count = 0
+    def forward(self, texts):
+        """
+        Accepts an array of texts; Passes texts through transformers network to create a tensor with numerical representation of those texts.
+        Returns a tensor with shape of (B, T, C), where B is length of the array; T is length, in tokens, of texts (including padding) - T will
+        be a multiple of 77; and C is dimensionality of each token - for SD1 it's 768, and for SD2 it's 1024.
+        An example shape returned by this function can be: (2, 77, 768).
+        Webui usually sends just one text at a time through this function - the only time when texts is an array with more than one elemenet
+        is when you do prompt editing: "a picture of a [cat:dog:0.4] eating ice cream"
+        """
 
-        cache = {}
-        batch_tokens = self.tokenize(texts)
-        batch_multipliers = []
-        for tokens in batch_tokens:
-            tuple_tokens = tuple(tokens)
+        if opts.use_old_emphasis_implementation:
+            import modules.sd_hijack_clip_old
+            return modules.sd_hijack_clip_old.forward_old(self, texts)
 
-            if tuple_tokens in cache:
-                remade_tokens, fixes, multipliers = cache[tuple_tokens]
-            else:
-                fixes = []
-                remade_tokens = []
-                multipliers = []
-                mult = 1.0
-
-                i = 0
-                while i < len(tokens):
-                    token = tokens[i]
-
-                    embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
-
-                    mult_change = self.token_mults.get(token) if opts.enable_emphasis else None
-                    if mult_change is not None:
-                        mult *= mult_change
-                        i += 1
-                    elif embedding is None:
-                        remade_tokens.append(token)
-                        multipliers.append(mult)
-                        i += 1
-                    else:
-                        emb_len = int(embedding.vec.shape[0])
-                        fixes.append((len(remade_tokens), embedding))
-                        remade_tokens += [0] * emb_len
-                        multipliers += [mult] * emb_len
-                        used_custom_terms.append((embedding.name, embedding.checksum()))
-                        i += embedding_length_in_tokens
-
-                if len(remade_tokens) > maxlen - 2:
-                    vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
-                    ovf = remade_tokens[maxlen - 2:]
-                    overflowing_words = [vocab.get(int(x), "") for x in ovf]
-                    overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))
-                    hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
-
-                token_count = len(remade_tokens)
-                remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
-                remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end]
-                cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
-
-            multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
-            multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]
-
-            remade_batch_tokens.append(remade_tokens)
-            hijack_fixes.append(fixes)
-            batch_multipliers.append(multipliers)
-        return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
-
-    def forward(self, text):
-        use_old = opts.use_old_emphasis_implementation
-        if use_old:
-            batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
-        else:
-            batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(text)
-
-        self.hijack.comments += hijack_comments
-
-        if len(used_custom_terms) > 0:
-            self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
-
-        if use_old:
-            self.hijack.fixes = hijack_fixes
-            return self.process_tokens(remade_batch_tokens, batch_multipliers)
-
-        z = None
-        i = 0
-        while max(map(len, remade_batch_tokens)) != 0:
-            rem_tokens = [x[75:] for x in remade_batch_tokens]
-            rem_multipliers = [x[75:] for x in batch_multipliers]
-
-            self.hijack.fixes = []
-            for unfiltered in hijack_fixes:
-                fixes = []
-                for fix in unfiltered:
-                    if fix[0] == i:
-                        fixes.append(fix[1])
-                self.hijack.fixes.append(fixes)
-
-            tokens = []
-            multipliers = []
-            for j in range(len(remade_batch_tokens)):
-                if len(remade_batch_tokens[j]) > 0:
-                    tokens.append(remade_batch_tokens[j][:75])
-                    multipliers.append(batch_multipliers[j][:75])
-                else:
-                    tokens.append([self.id_end] * 75)
-                    multipliers.append([1.0] * 75)
-
-            z1 = self.process_tokens(tokens, multipliers)
-            z = z1 if z is None else torch.cat((z, z1), axis=-2)
-
-            remade_batch_tokens = rem_tokens
-            batch_multipliers = rem_multipliers
-            i += 1
+        batch_chunks, token_count = self.process_texts(texts)
 
-        return z
+        used_embeddings = {}
+        chunk_count = max([len(x) for x in batch_chunks])
 
-    def process_tokens(self, remade_batch_tokens, batch_multipliers):
-        if not opts.use_old_emphasis_implementation:
-            remade_batch_tokens = [[self.id_start] + x[:75] + [self.id_end] for x in remade_batch_tokens]
-            batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers]
+        zs = []
+        for i in range(chunk_count):
+            batch_chunk = [chunks[i] if i < len(chunks) else self.empty_chunk() for chunks in batch_chunks]
+
+            tokens = [x.tokens for x in batch_chunk]
+            multipliers = [x.multipliers for x in batch_chunk]
+            self.hijack.fixes = [x.fixes for x in batch_chunk]
 
+            for fixes in self.hijack.fixes:
+                for position, embedding in fixes:
+                    used_embeddings[embedding.name] = embedding
+
+            z = self.process_tokens(tokens, multipliers)
+            zs.append(z)
+
+        if len(used_embeddings) > 0:
+            embeddings_list = ", ".join([f'{name} [{embedding.checksum()}]' for name, embedding in used_embeddings.items()])
+            self.hijack.comments.append(f"Used embeddings: {embeddings_list}")
+
+        return torch.hstack(zs)
+
+    def process_tokens(self, remade_batch_tokens, batch_multipliers):
+        """
+        sends one single prompt chunk to be encoded by transformers neural network.
+        remade_batch_tokens is a batch of tokens - a list, where every element is a list of tokens; usually
+        there are exactly 77 tokens in the list. batch_multipliers is the same but for multipliers instead of tokens.
+        Multipliers are used to give more or less weight to the outputs of transformers network. Each multiplier
+        corresponds to one token.
+        """
         tokens = torch.asarray(remade_batch_tokens).to(devices.device)
 
+        # this is for SD2: SD1 uses the same token for padding and end of text, while SD2 uses different ones.
         if self.id_end != self.id_pad:
             for batch_pos in range(len(remade_batch_tokens)):
                 index = remade_batch_tokens[batch_pos].index(self.id_end)
@@ -239,8 +234,7 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         z = self.encode_with_transformers(tokens)
 
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
-        batch_multipliers_of_same_length = [x + [1.0] * (75 - len(x)) for x in batch_multipliers]
-        batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(devices.device)
+        batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
         original_mean = z.mean()
         z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
         new_mean = z.mean()
diff --git a/modules/sd_hijack_clip_old.py b/modules/sd_hijack_clip_old.py
new file mode 100644
index 00000000..6d9fbbe6
--- /dev/null
+++ b/modules/sd_hijack_clip_old.py
@@ -0,0 +1,81 @@
+from modules import sd_hijack_clip
+from modules import shared
+
+
+def process_text_old(self: sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase, texts):
+    id_start = self.id_start
+    id_end = self.id_end
+    maxlen = self.wrapped.max_length  # you get to stay at 77
+    used_custom_terms = []
+    remade_batch_tokens = []
+    hijack_comments = []
+    hijack_fixes = []
+    token_count = 0
+
+    cache = {}
+    batch_tokens = self.tokenize(texts)
+    batch_multipliers = []
+    for tokens in batch_tokens:
+        tuple_tokens = tuple(tokens)
+
+        if tuple_tokens in cache:
+            remade_tokens, fixes, multipliers = cache[tuple_tokens]
+        else:
+            fixes = []
+            remade_tokens = []
+            multipliers = []
+            mult = 1.0
+
+            i = 0
+            while i < len(tokens):
+                token = tokens[i]
+
+                embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+
+                mult_change = self.token_mults.get(token) if shared.opts.enable_emphasis else None
+                if mult_change is not None:
+                    mult *= mult_change
+                    i += 1
+                elif embedding is None:
+                    remade_tokens.append(token)
+                    multipliers.append(mult)
+                    i += 1
+                else:
+                    emb_len = int(embedding.vec.shape[0])
+                    fixes.append((len(remade_tokens), embedding))
+                    remade_tokens += [0] * emb_len
+                    multipliers += [mult] * emb_len
+                    used_custom_terms.append((embedding.name, embedding.checksum()))
+                    i += embedding_length_in_tokens
+
+            if len(remade_tokens) > maxlen - 2:
+                vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
+                ovf = remade_tokens[maxlen - 2:]
+                overflowing_words = [vocab.get(int(x), "") for x in ovf]
+                overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))
+                hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
+
+            token_count = len(remade_tokens)
+            remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
+            remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end]
+            cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
+
+        multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
+        multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]
+
+        remade_batch_tokens.append(remade_tokens)
+        hijack_fixes.append(fixes)
+        batch_multipliers.append(multipliers)
+    return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
+
+
+def forward_old(self: sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase, texts):
+    batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = process_text_old(self, texts)
+
+    self.hijack.comments += hijack_comments
+
+    if len(used_custom_terms) > 0:
+        self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
+
+    self.hijack.fixes = hijack_fixes
+    return self.process_tokens(remade_batch_tokens, batch_multipliers)
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index f9f5e8cd..45882ed6 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -79,7 +79,6 @@ class EmbeddingDatabase:
 
         self.word_embeddings[embedding.name] = embedding
 
-        # TODO changing between clip and open clip changes tokenization, which will cause embeddings to stop working
         ids = model.cond_stage_model.tokenize([embedding.name])[0]
 
         first_id = ids[0]
diff --git a/modules/ui.py b/modules/ui.py
index b79d24ee..5d2f5bad 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -368,7 +368,7 @@ def update_token_counter(text, steps):
 
     flat_prompts = reduce(lambda list1, list2: list1+list2, prompt_schedules)
     prompts = [prompt_text for step, prompt_text in flat_prompts]
-    tokens, token_count, max_length = max([model_hijack.tokenize(prompt) for prompt in prompts], key=lambda args: args[1])
+    token_count, max_length = max([model_hijack.get_prompt_lengths(prompt) for prompt in prompts], key=lambda args: args[0])
     style_class = ' class="red"' if (token_count > max_length) else ""
     return f"<span {style_class}>{token_count}/{max_length}</span>"
 
-- 
cgit v1.2.3