Merge branch 'dev' into reorder-hotkeys

author: missionfloyd <missionfloyd@users.noreply.github.com> 2023-05-26 00:53:33 +0000
committer: GitHub <noreply@github.com> 2023-05-26 00:53:33 +0000
commit: 6645f23c4c715b1bc704c88a499b2f4224d7f1e6 (patch)
tree: 6aeb51e366254fe8993856a3db341690bb39dca5 /modules/devices.py
parent: 43bdaa2f0eda79c685792b06a2bd84c65806a48f (diff)
parent: a6e653be26cc05f4438145fa0082816e9fbbf5fc (diff)
download: stable-diffusion-webui-gfx803-6645f23c4c715b1bc704c88a499b2f4224d7f1e6.tar.gz
stable-diffusion-webui-gfx803-6645f23c4c715b1bc704c88a499b2f4224d7f1e6.tar.bz2
stable-diffusion-webui-gfx803-6645f23c4c715b1bc704c88a499b2f4224d7f1e6.zip
1 files changed, 19 insertions, 1 deletions
diff --git a/modules/devices.py b/modules/devices.py
index c705a3cb..1ed6ffdc 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -1,5 +1,7 @@
 import sys
 import contextlib
+from functools import lru_cache
+
 import torch
 from modules import errors
 
@@ -65,7 +67,7 @@ def enable_tf32():
 
         # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
         # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
-        if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
+        if any(torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())):
             torch.backends.cudnn.benchmark = True
 
         torch.backends.cuda.matmul.allow_tf32 = True
@@ -154,3 +156,19 @@ def test_for_nans(x, where):
     message += " Use --disable-nan-check commandline argument to disable this check."
 
     raise NansException(message)
+
+
+@lru_cache
+def first_time_calculation():
+    """
+    just do any calculation with pytorch layers - the first time this is done it allocaltes about 700MB of memory and
+    spends about 2.7 seconds doing that, at least wih NVidia.
+    """
+
+    x = torch.zeros((1, 1)).to(device, dtype)
+    linear = torch.nn.Linear(1, 1).to(device, dtype)
+    linear(x)
+
+    x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
+    conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
+    conv2d(x)
author	missionfloyd <missionfloyd@users.noreply.github.com>	2023-05-26 00:53:33 +0000
committer	GitHub <noreply@github.com>	2023-05-26 00:53:33 +0000
commit	6645f23c4c715b1bc704c88a499b2f4224d7f1e6 (patch)
tree	6aeb51e366254fe8993856a3db341690bb39dca5 /modules/devices.py
parent	43bdaa2f0eda79c685792b06a2bd84c65806a48f (diff)
parent	a6e653be26cc05f4438145fa0082816e9fbbf5fc (diff)
download	stable-diffusion-webui-gfx803-6645f23c4c715b1bc704c88a499b2f4224d7f1e6.tar.gz stable-diffusion-webui-gfx803-6645f23c4c715b1bc704c88a499b2f4224d7f1e6.tar.bz2 stable-diffusion-webui-gfx803-6645f23c4c715b1bc704c88a499b2f4224d7f1e6.zip