From 57eb54b838faa383c10079e1bb5471b7bee6a695 Mon Sep 17 00:00:00 2001 From: Extraltodeus Date: Sat, 22 Oct 2022 00:11:07 +0200 Subject: implement CUDA device selection by ID --- modules/devices.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index eb422583..8a159282 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -1,7 +1,6 @@ +import sys, os, shlex import contextlib - import torch - from modules import errors # has_mps is only available in nightly pytorch (for now), `getattr` for compatibility @@ -9,10 +8,26 @@ has_mps = getattr(torch, 'has_mps', False) cpu = torch.device("cpu") +def extract_device_id(args, name): + for x in range(len(args)): + if name in args[x]: return args[x+1] + return None def get_optimal_device(): if torch.cuda.is_available(): - return torch.device("cuda") + # CUDA device selection support: + if "shared" not in sys.modules: + commandline_args = os.environ.get('COMMANDLINE_ARGS', "") #re-parse the commandline arguments because using the shared.py module creates an import loop. + sys.argv += shlex.split(commandline_args) + device_id = extract_device_id(sys.argv, '--device-id') + else: + device_id = shared.cmd_opts.device_id + + if device_id is not None: + cuda_device = f"cuda:{device_id}" + return torch.device(cuda_device) + else: + return torch.device("cuda") if has_mps: return torch.device("mps") -- cgit v1.2.3 From 50b5504401e50b6c94eba41b37fe212b2f27b792 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 22 Oct 2022 14:04:14 +0300 Subject: remove parsing command line from devices.py --- modules/devices.py | 14 +++++--------- modules/lowvram.py | 9 ++++----- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 8a159282..dc1f3cdd 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -15,14 +15,10 @@ def extract_device_id(args, name): def get_optimal_device(): if torch.cuda.is_available(): - # CUDA device selection support: - if "shared" not in sys.modules: - commandline_args = os.environ.get('COMMANDLINE_ARGS', "") #re-parse the commandline arguments because using the shared.py module creates an import loop. - sys.argv += shlex.split(commandline_args) - device_id = extract_device_id(sys.argv, '--device-id') - else: - device_id = shared.cmd_opts.device_id - + from modules import shared + + device_id = shared.cmd_opts.device_id + if device_id is not None: cuda_device = f"cuda:{device_id}" return torch.device(cuda_device) @@ -49,7 +45,7 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") -device = device_interrogate = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() +device = device_interrogate = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = None dtype = torch.float16 dtype_vae = torch.float16 diff --git a/modules/lowvram.py b/modules/lowvram.py index 7eba1349..f327c3df 100644 --- a/modules/lowvram.py +++ b/modules/lowvram.py @@ -1,9 +1,8 @@ import torch -from modules.devices import get_optimal_device +from modules import devices module_in_gpu = None cpu = torch.device("cpu") -device = gpu = get_optimal_device() def send_everything_to_cpu(): @@ -33,7 +32,7 @@ def setup_for_low_vram(sd_model, use_medvram): if module_in_gpu is not None: module_in_gpu.to(cpu) - module.to(gpu) + module.to(devices.device) module_in_gpu = module # see below for register_forward_pre_hook; @@ -51,7 +50,7 @@ def setup_for_low_vram(sd_model, use_medvram): # send the model to GPU. Then put modules back. the modules will be in CPU. stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = None, None, None - sd_model.to(device) + sd_model.to(devices.device) sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = stored # register hooks for those the first two models @@ -70,7 +69,7 @@ def setup_for_low_vram(sd_model, use_medvram): # so that only one of them is in GPU at a time stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None - sd_model.model.to(device) + sd_model.model.to(devices.device) diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored # install hooks for bits of third model -- cgit v1.2.3 From 4c24347e45776d505937856ab280548d9298f0a8 Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 24 Oct 2022 23:04:50 -0400 Subject: Remove BSRGAN from --use-cpu, add SwinIR --- modules/devices.py | 2 +- modules/shared.py | 6 +++--- modules/swinir_model.py | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index dc1f3cdd..033a42d5 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -45,7 +45,7 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") -device = device_interrogate = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = None +device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None dtype = torch.float16 dtype_vae = torch.float16 diff --git a/modules/shared.py b/modules/shared.py index 76cbb1bd..308fccce 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -58,7 +58,7 @@ parser.add_argument("--opt-split-attention", action='store_true', help="force-en parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") -parser.add_argument("--use-cpu", nargs='+',choices=['all', 'sd', 'interrogate', 'gfpgan', 'bsrgan', 'esrgan', 'scunet', 'codeformer'], help="use CPU as torch device for specified modules", default=[], type=str.lower) +parser.add_argument("--use-cpu", nargs='+',choices=['all', 'sd', 'interrogate', 'gfpgan', 'swinir', 'esrgan', 'scunet', 'codeformer'], help="use CPU as torch device for specified modules", default=[], type=str.lower) parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False) @@ -96,8 +96,8 @@ restricted_opts = [ "outdir_save", ] -devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_bsrgan, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \ -(devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'bsrgan', 'esrgan', 'scunet', 'codeformer']) +devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_swinir, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \ +(devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'swinir', 'esrgan', 'scunet', 'codeformer']) device = devices.device weight_load_location = None if cmd_opts.lowram else "cpu" diff --git a/modules/swinir_model.py b/modules/swinir_model.py index baa02e3d..facd262d 100644 --- a/modules/swinir_model.py +++ b/modules/swinir_model.py @@ -7,8 +7,8 @@ from PIL import Image from basicsr.utils.download_util import load_file_from_url from tqdm import tqdm -from modules import modelloader -from modules.shared import cmd_opts, opts, device +from modules import modelloader, devices +from modules.shared import cmd_opts, opts from modules.swinir_model_arch import SwinIR as net from modules.swinir_model_arch_v2 import Swin2SR as net2 from modules.upscaler import Upscaler, UpscalerData @@ -42,7 +42,7 @@ class UpscalerSwinIR(Upscaler): model = self.load_model(model_file) if model is None: return img - model = model.to(device) + model = model.to(devices.device_swinir) img = upscale(img, model) try: torch.cuda.empty_cache() @@ -111,7 +111,7 @@ def upscale( img = img[:, :, ::-1] img = np.moveaxis(img, 2, 0) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(device) + img = img.unsqueeze(0).to(devices.device_swinir) with torch.no_grad(), precision_scope("cuda"): _, _, h_old, w_old = img.size() h_pad = (h_old // window_size + 1) * window_size - h_old @@ -139,8 +139,8 @@ def inference(img, model, tile, tile_overlap, window_size, scale): stride = tile - tile_overlap h_idx_list = list(range(0, h - tile, stride)) + [h - tile] w_idx_list = list(range(0, w - tile, stride)) + [w - tile] - E = torch.zeros(b, c, h * sf, w * sf, dtype=torch.half, device=device).type_as(img) - W = torch.zeros_like(E, dtype=torch.half, device=device) + E = torch.zeros(b, c, h * sf, w * sf, dtype=torch.half, device=devices.device_swinir).type_as(img) + W = torch.zeros_like(E, dtype=torch.half, device=devices.device_swinir) with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="SwinIR tiles") as pbar: for h_idx in h_idx_list: -- cgit v1.2.3 From faed465a0b1a7d19669568738c93e04907c10415 Mon Sep 17 00:00:00 2001 From: brkirch Date: Tue, 25 Oct 2022 02:01:57 -0400 Subject: MPS Upscalers Fix Get ESRGAN, SCUNet, and SwinIR working correctly on MPS by ensuring memory is contiguous for tensor views before sending to MPS device. --- modules/devices.py | 4 ++++ modules/esrgan_model.py | 2 +- modules/scunet_model.py | 3 +-- modules/swinir_model.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 033a42d5..7511e1dc 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -81,3 +81,7 @@ def autocast(disable=False): return contextlib.nullcontext() return torch.autocast("cuda") + +# MPS workaround for https://github.com/pytorch/pytorch/issues/79383 +def mps_contiguous(input_tensor, device): return input_tensor.contiguous() if device.type == 'mps' else input_tensor +def mps_contiguous_to(input_tensor, device): return mps_contiguous(input_tensor, device).to(device) diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py index a49e2258..a13cf6ac 100644 --- a/modules/esrgan_model.py +++ b/modules/esrgan_model.py @@ -190,7 +190,7 @@ def upscale_without_tiling(model, img): img = img[:, :, ::-1] img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(devices.device_esrgan) + img = devices.mps_contiguous_to(img.unsqueeze(0), devices.device_esrgan) with torch.no_grad(): output = model(img) output = output.squeeze().float().cpu().clamp_(0, 1).numpy() diff --git a/modules/scunet_model.py b/modules/scunet_model.py index 36a996bf..59532274 100644 --- a/modules/scunet_model.py +++ b/modules/scunet_model.py @@ -54,9 +54,8 @@ class UpscalerScuNET(modules.upscaler.Upscaler): img = img[:, :, ::-1] img = np.moveaxis(img, 2, 0) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(device) + img = devices.mps_contiguous_to(img.unsqueeze(0), device) - img = img.to(device) with torch.no_grad(): output = model(img) output = output.squeeze().float().cpu().clamp_(0, 1).numpy() diff --git a/modules/swinir_model.py b/modules/swinir_model.py index facd262d..4253b66d 100644 --- a/modules/swinir_model.py +++ b/modules/swinir_model.py @@ -111,7 +111,7 @@ def upscale( img = img[:, :, ::-1] img = np.moveaxis(img, 2, 0) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(devices.device_swinir) + img = devices.mps_contiguous_to(img.unsqueeze(0), devices.device_swinir) with torch.no_grad(), precision_scope("cuda"): _, _, h_old, w_old = img.size() h_pad = (h_old // window_size + 1) * window_size - h_old -- cgit v1.2.3