From 5681bf801664aa09fa02ab8b4e73f780d9563440 Mon Sep 17 00:00:00 2001 From: catboxanon <122327233+catboxanon@users.noreply.github.com> Date: Thu, 31 Aug 2023 14:57:16 -0400 Subject: More accurate check for enabling cuDNN benchmark on 16XX cards --- modules/devices.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index c01f0602..63c38eff 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -60,7 +60,8 @@ def enable_tf32(): # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407 - if any(torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())): + device_id = (int(shared.cmd_opts.device_id) if shared.cmd_opts.device_id.isdigit() else 0) or torch.cuda.current_device() + if torch.cuda.get_device_capability(device_id) == (7, 5) and torch.cuda.get_device_name(device_id).startswith("NVIDIA GeForce GTX 16"): torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True -- cgit v1.2.3 From 46375f059276cb2d4d1e47bf65f984c6466dc2a0 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 9 Sep 2023 09:39:37 +0300 Subject: fix for crash when running #12924 without --device-id --- modules/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 63c38eff..1d4eb563 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -60,7 +60,7 @@ def enable_tf32(): # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407 - device_id = (int(shared.cmd_opts.device_id) if shared.cmd_opts.device_id.isdigit() else 0) or torch.cuda.current_device() + device_id = (int(shared.cmd_opts.device_id) if shared.cmd_opts.device_id is not None and shared.cmd_opts.device_id.isdigit() else 0) or torch.cuda.current_device() if torch.cuda.get_device_capability(device_id) == (7, 5) and torch.cuda.get_device_name(device_id).startswith("NVIDIA GeForce GTX 16"): torch.backends.cudnn.benchmark = True -- cgit v1.2.3 From 3cd6e1d0a0877e6f1ac931c8253e6eee09da3805 Mon Sep 17 00:00:00 2001 From: obsol <33932119+read-0nly@users.noreply.github.com> Date: Mon, 27 Nov 2023 19:21:43 -0500 Subject: Update devices.py fixes issue where "--use-cpu" all properly makes SD run on CPU but leaves ControlNet (and other extensions, I presume) pointed at GPU, causing a crash in ControlNet caused by a mismatch between devices between SD and CN https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/14097 --- modules/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index c01f0602..65efcf1e 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -38,7 +38,7 @@ def get_optimal_device(): def get_device_for(task): - if task in shared.cmd_opts.use_cpu: + if task in shared.cmd_opts.use_cpu or "all" in shared.cmd_opts.use_cpu: return cpu return get_optimal_device() -- cgit v1.2.3 From 8b40f475a31109cc6ecbdc0d14a0cee9e0303291 Mon Sep 17 00:00:00 2001 From: Nuullll Date: Fri, 10 Nov 2023 11:06:26 +0800 Subject: Initial IPEX support --- modules/devices.py | 11 +++++++++-- modules/xpu_specific.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 modules/xpu_specific.py (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 1d4eb563..be599736 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,7 +3,7 @@ import contextlib from functools import lru_cache import torch -from modules import errors, shared +from modules import errors, shared, xpu_specific if sys.platform == "darwin": from modules import mac_specific @@ -30,6 +30,9 @@ def get_optimal_device_name(): if has_mps(): return "mps" + if xpu_specific.has_ipex: + return xpu_specific.get_xpu_device_string() + return "cpu" @@ -100,11 +103,15 @@ def autocast(disable=False): if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() + if xpu_specific.has_xpu: + return torch.autocast("xpu") + return torch.autocast("cuda") def without_autocast(disable=False): - return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() + device_type = "xpu" if xpu_specific.has_xpu else "cuda" + return torch.autocast(device_type, enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() class NansException(Exception): diff --git a/modules/xpu_specific.py b/modules/xpu_specific.py new file mode 100644 index 00000000..6417dd2d --- /dev/null +++ b/modules/xpu_specific.py @@ -0,0 +1,42 @@ +import contextlib +from modules import shared +from modules.sd_hijack_utils import CondFunc + +has_ipex = False +try: + import torch + import intel_extension_for_pytorch as ipex + has_ipex = True +except Exception: + pass + +def check_for_xpu(): + if not has_ipex: + return False + + return hasattr(torch, 'xpu') and torch.xpu.is_available() + +has_xpu = check_for_xpu() + +def get_xpu_device_string(): + if shared.cmd_opts.device_id is not None: + return f"xpu:{shared.cmd_opts.device_id}" + return "xpu" + +def return_null_context(*args, **kwargs): # pylint: disable=unused-argument + return contextlib.nullcontext() + +if has_xpu: + CondFunc('torch.Generator', + lambda orig_func, device=None: torch.xpu.Generator(device), + lambda orig_func, device=None: device is not None and device != torch.device("cpu") and device != "cpu") + + CondFunc('torch.nn.functional.layer_norm', + lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: + orig_func(input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs), + lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: + weight is not None and input.dtype != weight.data.dtype) + + CondFunc('torch.nn.modules.GroupNorm.forward', + lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), + lambda orig_func, self, input: input.dtype != self.weight.data.dtype) -- cgit v1.2.3 From 7499148ad4dbd3444215c843d02453f68c459707 Mon Sep 17 00:00:00 2001 From: Nuullll Date: Sat, 2 Dec 2023 14:00:46 +0800 Subject: Disable ipex autocast due to its bad perf --- modules/cmd_args.py | 1 + modules/devices.py | 20 +++++++++++++------- modules/xpu_specific.py | 28 ++++++++++++++++++---------- webui-ipex-user.bat | 19 +++++++++++++++++++ 4 files changed, 51 insertions(+), 17 deletions(-) create mode 100644 webui-ipex-user.bat (limited to 'modules/devices.py') diff --git a/modules/cmd_args.py b/modules/cmd_args.py index a9fb9bfa..da93eb26 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -70,6 +70,7 @@ parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="pre parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization") parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI") parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower) +parser.add_argument("--use-ipex", action="store_true", help="use Intel XPU as torch device") parser.add_argument("--disable-model-loading-ram-optimization", action='store_true', help="disable an optimization that reduces RAM use when loading a model") parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) diff --git a/modules/devices.py b/modules/devices.py index be599736..37ecca78 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,11 +3,18 @@ import contextlib from functools import lru_cache import torch -from modules import errors, shared, xpu_specific +from modules import errors, shared if sys.platform == "darwin": from modules import mac_specific +if shared.cmd_opts.use_ipex: + from modules import xpu_specific + + +def has_xpu() -> bool: + return shared.cmd_opts.use_ipex and xpu_specific.has_xpu + def has_mps() -> bool: if sys.platform != "darwin": @@ -30,7 +37,7 @@ def get_optimal_device_name(): if has_mps(): return "mps" - if xpu_specific.has_ipex: + if has_xpu(): return xpu_specific.get_xpu_device_string() return "cpu" @@ -57,6 +64,9 @@ def torch_gc(): if has_mps(): mac_specific.torch_mps_gc() + if has_xpu(): + xpu_specific.torch_xpu_gc() + def enable_tf32(): if torch.cuda.is_available(): @@ -103,15 +113,11 @@ def autocast(disable=False): if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() - if xpu_specific.has_xpu: - return torch.autocast("xpu") - return torch.autocast("cuda") def without_autocast(disable=False): - device_type = "xpu" if xpu_specific.has_xpu else "cuda" - return torch.autocast(device_type, enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() + return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() class NansException(Exception): diff --git a/modules/xpu_specific.py b/modules/xpu_specific.py index 6417dd2d..2df68665 100644 --- a/modules/xpu_specific.py +++ b/modules/xpu_specific.py @@ -1,4 +1,3 @@ -import contextlib from modules import shared from modules.sd_hijack_utils import CondFunc @@ -10,33 +9,42 @@ try: except Exception: pass -def check_for_xpu(): - if not has_ipex: - return False - return hasattr(torch, 'xpu') and torch.xpu.is_available() +def check_for_xpu(): + return has_ipex and hasattr(torch, 'xpu') and torch.xpu.is_available() -has_xpu = check_for_xpu() def get_xpu_device_string(): if shared.cmd_opts.device_id is not None: return f"xpu:{shared.cmd_opts.device_id}" return "xpu" -def return_null_context(*args, **kwargs): # pylint: disable=unused-argument - return contextlib.nullcontext() + +def torch_xpu_gc(): + with torch.xpu.device(get_xpu_device_string()): + torch.xpu.empty_cache() + + +has_xpu = check_for_xpu() if has_xpu: + # W/A for https://github.com/intel/intel-extension-for-pytorch/issues/452: torch.Generator API doesn't support XPU device CondFunc('torch.Generator', lambda orig_func, device=None: torch.xpu.Generator(device), - lambda orig_func, device=None: device is not None and device != torch.device("cpu") and device != "cpu") + lambda orig_func, device=None: device is not None and device.type == "xpu") + # W/A for some OPs that could not handle different input dtypes CondFunc('torch.nn.functional.layer_norm', lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: orig_func(input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs), lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: weight is not None and input.dtype != weight.data.dtype) - CondFunc('torch.nn.modules.GroupNorm.forward', lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), lambda orig_func, self, input: input.dtype != self.weight.data.dtype) + CondFunc('torch.nn.modules.linear.Linear.forward', + lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), + lambda orig_func, self, input: input.dtype != self.weight.data.dtype) + CondFunc('torch.nn.modules.conv.Conv2d.forward', + lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), + lambda orig_func, self, input: input.dtype != self.weight.data.dtype) diff --git a/webui-ipex-user.bat b/webui-ipex-user.bat new file mode 100644 index 00000000..ab25a040 --- /dev/null +++ b/webui-ipex-user.bat @@ -0,0 +1,19 @@ +@echo off + +set PYTHON= +@REM The "Nuullll/intel-extension-for-pytorch" wheels were built from IPEX source for Intel Arc GPU: https://github.com/intel/intel-extension-for-pytorch/tree/xpu-main +@REM This is NOT an Intel official release so please use it at your own risk!! +@REM See https://github.com/Nuullll/intel-extension-for-pytorch/releases/tag/v2.0.110%2Bxpu-master%2Bdll-bundle for details. +@REM +@REM Strengths (over official IPEX 2.0.110 windows release): +@REM - AOT build (for Arc GPU only) to eliminate JIT compilation overhead: https://github.com/intel/intel-extension-for-pytorch/issues/399 +@REM - Bundles minimal oneAPI 2023.2 dependencies into the python wheels, so users don't need to install oneAPI for the whole system. +@REM - Provides a compatible torchvision wheel: https://github.com/intel/intel-extension-for-pytorch/issues/465 +@REM Limitation: +@REM - Only works for python 3.10 +set "TORCH_COMMAND=pip install https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/torch-2.0.0a0+gite9ebda2-cp310-cp310-win_amd64.whl https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/torchvision-0.15.2a0+fa99a53-cp310-cp310-win_amd64.whl https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/intel_extension_for_pytorch-2.0.110+gitc6ea20b-cp310-cp310-win_amd64.whl" +set GIT= +set VENV_DIR= +set "COMMANDLINE_ARGS=--use-ipex --skip-torch-cuda-test --skip-version-check --opt-sdp-attention" + +call webui.bat -- cgit v1.2.3