diff options
author | AUTOMATIC <16777216c@gmail.com> | 2022-10-19 05:42:22 +0000 |
---|---|---|
committer | AUTOMATIC <16777216c@gmail.com> | 2022-10-19 05:42:22 +0000 |
commit | 10aca1ca3e81e69e08f556a500c3dc603451429b (patch) | |
tree | a65469939a71c7ab16b50af27c9eb14e8edcd626 | |
parent | c1093b8051606f0ac90506b7114c4b55d0447c70 (diff) | |
download | stable-diffusion-webui-gfx803-10aca1ca3e81e69e08f556a500c3dc603451429b.tar.gz stable-diffusion-webui-gfx803-10aca1ca3e81e69e08f556a500c3dc603451429b.tar.bz2 stable-diffusion-webui-gfx803-10aca1ca3e81e69e08f556a500c3dc603451429b.zip |
more careful loading of model weights (eliminates some issues with checkpoints that have weird cond_stage_model layer names)
-rw-r--r-- | modules/sd_models.py | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/modules/sd_models.py b/modules/sd_models.py index 3aa21ec1..7ad6d474 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -122,11 +122,33 @@ def select_checkpoint(): return checkpoint_info
+chckpoint_dict_replacements = {
+ 'cond_stage_model.transformer.embeddings.': 'cond_stage_model.transformer.text_model.embeddings.',
+ 'cond_stage_model.transformer.encoder.': 'cond_stage_model.transformer.text_model.encoder.',
+ 'cond_stage_model.transformer.final_layer_norm.': 'cond_stage_model.transformer.text_model.final_layer_norm.',
+}
+
+
+def transform_checkpoint_dict_key(k):
+ for text, replacement in chckpoint_dict_replacements.items():
+ if k.startswith(text):
+ k = replacement + k[len(text):]
+
+ return k
+
+
def get_state_dict_from_checkpoint(pl_sd):
if "state_dict" in pl_sd:
- return pl_sd["state_dict"]
+ pl_sd = pl_sd["state_dict"]
+
+ sd = {}
+ for k, v in pl_sd.items():
+ new_key = transform_checkpoint_dict_key(k)
+
+ if new_key is not None:
+ sd[new_key] = v
- return pl_sd
+ return sd
def load_model_weights(model, checkpoint_info):
@@ -141,7 +163,7 @@ def load_model_weights(model, checkpoint_info): print(f"Global Step: {pl_sd['global_step']}")
sd = get_state_dict_from_checkpoint(pl_sd)
- model.load_state_dict(sd, strict=False)
+ missing, extra = model.load_state_dict(sd, strict=False)
if shared.cmd_opts.opt_channelslast:
model.to(memory_format=torch.channels_last)
|