ManualCast for 10/16 series gpu

author: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> 2023-10-28 07:24:26 +0000
committer: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> 2023-10-28 07:24:26 +0000
commit: d4d3134f6d2d232c7bcfa80900a362921e644976 (patch)
tree: a592066be90aecdf86a76270cca78001cce5d3eb /modules/sd_models.py
parent: 0beb131c7ffae6f756a6339206da311232a36970 (diff)
download: stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.tar.gz
stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.tar.bz2
stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.zip
1 files changed, 12 insertions, 9 deletions
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ccb6afd2..31bcb913 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -403,23 +403,26 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
     if enable_fp8:
         devices.fp8 = True
+        if model.is_sdxl:
+            cond_stage = model.conditioner
+        else:
+            cond_stage = model.cond_stage_model
+
+        for module in cond_stage.modules():
+            if isinstance(module, torch.nn.Linear):
+                module.to(torch.float8_e4m3fn)
+
         if devices.device == devices.cpu:
             for module in model.model.diffusion_model.modules():
                 if isinstance(module, torch.nn.Conv2d):
                     module.to(torch.float8_e4m3fn)
                 elif isinstance(module, torch.nn.Linear):
                     module.to(torch.float8_e4m3fn)
-            timer.record("apply fp8 unet for cpu")
         else:
-            if model.is_sdxl:
-                cond_stage = model.conditioner
-            else:
-                cond_stage = model.cond_stage_model
-            for module in cond_stage.modules():
-                if isinstance(module, torch.nn.Linear):
-                    module.to(torch.float8_e4m3fn)
             model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
-            timer.record("apply fp8 unet")
+        timer.record("apply fp8")
+    else:
+        devices.fp8 = False
 
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
author	Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>	2023-10-28 07:24:26 +0000
committer	Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>	2023-10-28 07:24:26 +0000
commit	d4d3134f6d2d232c7bcfa80900a362921e644976 (patch)
tree	a592066be90aecdf86a76270cca78001cce5d3eb /modules/sd_models.py
parent	0beb131c7ffae6f756a6339206da311232a36970 (diff)
download	stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.tar.gz stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.tar.bz2 stable-diffusion-webui-gfx803-d4d3134f6d2d232c7bcfa80900a362921e644976.zip