From cd6c55c1ab14fcab15329cde599cf79e8d555657 Mon Sep 17 00:00:00 2001
From: pepe10-gpu <pepe.dannyboy@gmail.com>
Date: Sun, 6 Nov 2022 17:05:51 -0800
Subject: 16xx card fix

cudnn
---
 modules/devices.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 7511e1dc..858bf399 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -39,10 +39,13 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cudnn.enabled = True
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
 
 
+
 errors.run(enable_tf32, "Enabling TF32")
 
 device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
-- 
cgit v1.2.3


From 29eff4a194d22f0f0e7a7a976d746a71a4193cf5 Mon Sep 17 00:00:00 2001
From: pepe10-gpu <pepe.dannyboy@gmail.com>
Date: Mon, 7 Nov 2022 18:06:48 -0800
Subject: terrible hack

---
 modules/devices.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 858bf399..4c63f465 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -39,8 +39,15 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
-        torch.backends.cudnn.benchmark = True
-        torch.backends.cudnn.enabled = True
+        #TODO: make this better; find a way to check if it is a turing card
+        turing = ["1630","1650","1660","Quadro RTX 3000","Quadro RTX 4000","Quadro RTX 4000","Quadro RTX 5000","Quadro RTX 5000","Quadro RTX 6000","Quadro RTX 6000","Quadro RTX 8000","Quadro RTX T400","Quadro RTX T400","Quadro RTX T600","Quadro RTX T1000","Quadro RTX T1000","2060","2070","2080","Titan RTX","Tesla T4","MX450","MX550"]
+        for devid in range(0,torch.cuda.device_count()):
+            for i in turing:
+                if i in torch.cuda.get_device_name(devid):
+                    shd = True
+        if shd:
+            torch.backends.cudnn.benchmark = True
+            torch.backends.cudnn.enabled = True
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
 
-- 
cgit v1.2.3


From 62e9fec3df8518da3a2c35fa090bb54946c856b2 Mon Sep 17 00:00:00 2001
From: pepe10-gpu <pepe.dannyboy@gmail.com>
Date: Tue, 8 Nov 2022 15:19:09 -0800
Subject: actual better fix

thanks C43H66N12O12S2
---
 modules/devices.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 4c63f465..058a5e00 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -39,12 +39,9 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
-        #TODO: make this better; find a way to check if it is a turing card
-        turing = ["1630","1650","1660","Quadro RTX 3000","Quadro RTX 4000","Quadro RTX 4000","Quadro RTX 5000","Quadro RTX 5000","Quadro RTX 6000","Quadro RTX 6000","Quadro RTX 8000","Quadro RTX T400","Quadro RTX T400","Quadro RTX T600","Quadro RTX T1000","Quadro RTX T1000","2060","2070","2080","Titan RTX","Tesla T4","MX450","MX550"]
         for devid in range(0,torch.cuda.device_count()):
-            for i in turing:
-                if i in torch.cuda.get_device_name(devid):
-                    shd = True
+            if torch.cuda.get_device_capability(devid) == (7, 5):
+                shd = True
         if shd:
             torch.backends.cudnn.benchmark = True
             torch.backends.cudnn.enabled = True
-- 
cgit v1.2.3


From 0fddb4a1c06a6e2122add7eee3b001a6d473baee Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Wed, 30 Nov 2022 08:02:39 -0500
Subject: Rework MPS randn fix, add randn_like fix

torch.manual_seed() already sets a CPU generator, so there is no reason to create a CPU generator manually. torch.randn_like also needs a MPS fix for k-diffusion, but a torch hijack with randn_like already exists so it can also be used for that.
---
 modules/devices.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index f00079c6..046460fa 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -66,24 +66,15 @@ dtype_vae = torch.float16
 
 
 def randn(seed, shape):
-    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
-    if device.type == 'mps':
-        generator = torch.Generator(device=cpu)
-        generator.manual_seed(seed)
-        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
-        return noise
-
     torch.manual_seed(seed)
+    if device.type == 'mps':
+        return torch.randn(shape, device=cpu).to(device)
     return torch.randn(shape, device=device)
 
 
 def randn_without_seed(shape):
-    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
     if device.type == 'mps':
-        generator = torch.Generator(device=cpu)
-        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
-        return noise
-
+        return torch.randn(shape, device=cpu).to(device)
     return torch.randn(shape, device=device)
 
 
-- 
cgit v1.2.3


From 2651267e3af5886b8b6b1dc3023f2507f7079118 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 15:57:52 +0300
Subject: fix #4407 breaking UI entirely for card other than ones related to
 the PR

---
 modules/devices.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 1325569c..547ea46c 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -53,12 +53,10 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
-        for devid in range(0,torch.cuda.device_count()):
-            if torch.cuda.get_device_capability(devid) == (7, 5):
-                shd = True
-        if shd:
+        if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
             torch.backends.cudnn.benchmark = True
             torch.backends.cudnn.enabled = True
+
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
 
-- 
cgit v1.2.3


From 46b0d230e7c13e247eabb22e1103ce512e7ed6b1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 16:01:23 +0300
Subject: add comment for #4407 and remove seemingly unnecessary cudnn.enabled

---
 modules/devices.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 547ea46c..d6a76844 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -53,9 +53,11 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
+
+        # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
+        # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
         if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
             torch.backends.cudnn.benchmark = True
-            torch.backends.cudnn.enabled = True
 
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
-- 
cgit v1.2.3


From b6e5edd74657e3fd1fbd04f341b7a84625d4aa7a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 18:06:33 +0300
Subject: add built-in extension system add support for adding upscalers in
 extensions move LDSR, ScuNET and SwinIR to built-in extensions

---
 modules/devices.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index d6a76844..f8cffae1 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -44,6 +44,15 @@ def get_optimal_device():
     return cpu
 
 
+def get_device_for(task):
+    from modules import shared
+
+    if task in shared.cmd_opts.use_cpu:
+        return cpu
+
+    return get_optimal_device()
+
+
 def torch_gc():
     if torch.cuda.is_available():
         with torch.cuda.device(get_cuda_device_string()):
@@ -67,7 +76,7 @@ def enable_tf32():
 errors.run(enable_tf32, "Enabling TF32")
 
 cpu = torch.device("cpu")
-device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
+device = device_interrogate = device_gfpgan = device_esrgan = device_codeformer = None
 dtype = torch.float16
 dtype_vae = torch.float16
 
-- 
cgit v1.2.3