From 57eb54b838faa383c10079e1bb5471b7bee6a695 Mon Sep 17 00:00:00 2001 From: Extraltodeus Date: Sat, 22 Oct 2022 00:11:07 +0200 Subject: implement CUDA device selection by ID --- modules/devices.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index eb422583..8a159282 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -1,7 +1,6 @@ +import sys, os, shlex import contextlib - import torch - from modules import errors # has_mps is only available in nightly pytorch (for now), `getattr` for compatibility @@ -9,10 +8,26 @@ has_mps = getattr(torch, 'has_mps', False) cpu = torch.device("cpu") +def extract_device_id(args, name): + for x in range(len(args)): + if name in args[x]: return args[x+1] + return None def get_optimal_device(): if torch.cuda.is_available(): - return torch.device("cuda") + # CUDA device selection support: + if "shared" not in sys.modules: + commandline_args = os.environ.get('COMMANDLINE_ARGS', "") #re-parse the commandline arguments because using the shared.py module creates an import loop. + sys.argv += shlex.split(commandline_args) + device_id = extract_device_id(sys.argv, '--device-id') + else: + device_id = shared.cmd_opts.device_id + + if device_id is not None: + cuda_device = f"cuda:{device_id}" + return torch.device(cuda_device) + else: + return torch.device("cuda") if has_mps: return torch.device("mps") -- cgit v1.2.3 From 50b5504401e50b6c94eba41b37fe212b2f27b792 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 22 Oct 2022 14:04:14 +0300 Subject: remove parsing command line from devices.py --- modules/devices.py | 14 +++++--------- modules/lowvram.py | 9 ++++----- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 8a159282..dc1f3cdd 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -15,14 +15,10 @@ def extract_device_id(args, name): def get_optimal_device(): if torch.cuda.is_available(): - # CUDA device selection support: - if "shared" not in sys.modules: - commandline_args = os.environ.get('COMMANDLINE_ARGS', "") #re-parse the commandline arguments because using the shared.py module creates an import loop. - sys.argv += shlex.split(commandline_args) - device_id = extract_device_id(sys.argv, '--device-id') - else: - device_id = shared.cmd_opts.device_id - + from modules import shared + + device_id = shared.cmd_opts.device_id + if device_id is not None: cuda_device = f"cuda:{device_id}" return torch.device(cuda_device) @@ -49,7 +45,7 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") -device = device_interrogate = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() +device = device_interrogate = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = None dtype = torch.float16 dtype_vae = torch.float16 diff --git a/modules/lowvram.py b/modules/lowvram.py index 7eba1349..f327c3df 100644 --- a/modules/lowvram.py +++ b/modules/lowvram.py @@ -1,9 +1,8 @@ import torch -from modules.devices import get_optimal_device +from modules import devices module_in_gpu = None cpu = torch.device("cpu") -device = gpu = get_optimal_device() def send_everything_to_cpu(): @@ -33,7 +32,7 @@ def setup_for_low_vram(sd_model, use_medvram): if module_in_gpu is not None: module_in_gpu.to(cpu) - module.to(gpu) + module.to(devices.device) module_in_gpu = module # see below for register_forward_pre_hook; @@ -51,7 +50,7 @@ def setup_for_low_vram(sd_model, use_medvram): # send the model to GPU. Then put modules back. the modules will be in CPU. stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = None, None, None - sd_model.to(device) + sd_model.to(devices.device) sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = stored # register hooks for those the first two models @@ -70,7 +69,7 @@ def setup_for_low_vram(sd_model, use_medvram): # so that only one of them is in GPU at a time stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None - sd_model.model.to(device) + sd_model.model.to(devices.device) diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored # install hooks for bits of third model -- cgit v1.2.3