1 files changed, 18 insertions, 7 deletions
diff --git a/modules/devices.py b/modules/devices.py
index 6b36622c..655ca1d3 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -34,14 +34,18 @@ def get_cuda_device_string():
     return "cuda"
 
 
-def get_optimal_device():
+def get_optimal_device_name():
     if torch.cuda.is_available():
-        return torch.device(get_cuda_device_string())
+        return get_cuda_device_string()
 
     if has_mps():
-        return torch.device("mps")
+        return "mps"
+
+    return "cpu"
 
-    return cpu
+
+def get_optimal_device():
+    return torch.device(get_optimal_device_name())
 
 
 def get_device_for(task):
@@ -83,6 +87,14 @@ dtype_unet = torch.float16
 unet_needs_upcast = False
 
 
+def cond_cast_unet(input):
+    return input.to(dtype_unet) if unet_needs_upcast else input
+
+
+def cond_cast_float(input):
+    return input.float() if unet_needs_upcast else input
+
+
 def randn(seed, shape):
     torch.manual_seed(seed)
     if device.type == 'mps':
@@ -139,6 +151,8 @@ def test_for_nans(x, where):
     else:
         message = "A tensor with all NaNs was produced."
 
+    message += " Use --disable-nan-check commandline argument to disable this check."
+
     raise NansException(message)
 
 
@@ -193,6 +207,3 @@ if has_mps():
         cumsum_needs_bool_fix = not torch.BoolTensor([True,True]).to(device=torch.device("mps"), dtype=torch.int64).equal(torch.BoolTensor([True,False]).to(torch.device("mps")).cumsum(0))
         torch.cumsum = lambda input, *args, **kwargs: ( cumsum_fix(input, orig_cumsum, *args, **kwargs) )
         torch.Tensor.cumsum = lambda self, *args, **kwargs: ( cumsum_fix(self, orig_Tensor_cumsum, *args, **kwargs) )
-        orig_narrow = torch.narrow
-        torch.narrow = lambda *args, **kwargs: ( orig_narrow(*args, **kwargs).clone() )
-