diff options
author | Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> | 2023-10-28 07:24:26 +0000 |
---|---|---|
committer | Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> | 2023-10-28 07:24:26 +0000 |
commit | d4d3134f6d2d232c7bcfa80900a362921e644976 (patch) | |
tree | a592066be90aecdf86a76270cca78001cce5d3eb /modules/sd_models.py | |
parent | 0beb131c7ffae6f756a6339206da311232a36970 (diff) | |
download | stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.tar.gz stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.tar.bz2 stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.zip |
ManualCast for 10/16 series gpu
Diffstat (limited to 'modules/sd_models.py')
-rw-r--r-- | modules/sd_models.py | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/modules/sd_models.py b/modules/sd_models.py index ccb6afd2..31bcb913 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -403,23 +403,26 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer if enable_fp8:
devices.fp8 = True
+ if model.is_sdxl:
+ cond_stage = model.conditioner
+ else:
+ cond_stage = model.cond_stage_model
+
+ for module in cond_stage.modules():
+ if isinstance(module, torch.nn.Linear):
+ module.to(torch.float8_e4m3fn)
+
if devices.device == devices.cpu:
for module in model.model.diffusion_model.modules():
if isinstance(module, torch.nn.Conv2d):
module.to(torch.float8_e4m3fn)
elif isinstance(module, torch.nn.Linear):
module.to(torch.float8_e4m3fn)
- timer.record("apply fp8 unet for cpu")
else:
- if model.is_sdxl:
- cond_stage = model.conditioner
- else:
- cond_stage = model.cond_stage_model
- for module in cond_stage.modules():
- if isinstance(module, torch.nn.Linear):
- module.to(torch.float8_e4m3fn)
model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
- timer.record("apply fp8 unet")
+ timer.record("apply fp8")
+ else:
+ devices.fp8 = False
devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
|