From e472383acbb9e07dca311abe5fb16ee2675e410a Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Wed, 27 Dec 2023 11:04:33 +0200
Subject: Refactor esrgan_upscale to more generic upscale_with_model

---
 modules/upscaler_utils.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 modules/upscaler_utils.py

(limited to 'modules/upscaler_utils.py')

diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
new file mode 100644
index 00000000..8bdda51c
--- /dev/null
+++ b/modules/upscaler_utils.py
@@ -0,0 +1,66 @@
+import logging
+from typing import Callable
+
+import numpy as np
+import torch
+import tqdm
+from PIL import Image
+
+from modules import devices, images
+
+logger = logging.getLogger(__name__)
+
+
+def upscale_without_tiling(model, img: Image.Image):
+    img = np.array(img)
+    img = img[:, :, ::-1]
+    img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
+    img = torch.from_numpy(img).float()
+    img = img.unsqueeze(0).to(devices.device_esrgan)
+    with torch.no_grad():
+        output = model(img)
+    output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = 255. * np.moveaxis(output, 0, 2)
+    output = output.astype(np.uint8)
+    output = output[:, :, ::-1]
+    return Image.fromarray(output, 'RGB')
+
+
+def upscale_with_model(
+    model: Callable[[torch.Tensor], torch.Tensor],
+    img: Image.Image,
+    *,
+    tile_size: int,
+    tile_overlap: int = 0,
+    desc="tiled upscale",
+) -> Image.Image:
+    if tile_size <= 0:
+        logger.debug("Upscaling %s without tiling", img)
+        output = upscale_without_tiling(model, img)
+        logger.debug("=> %s", output)
+        return output
+
+    grid = images.split_grid(img, tile_size, tile_size, tile_overlap)
+    newtiles = []
+
+    with tqdm.tqdm(total=grid.tile_count, desc=desc) as p:
+        for y, h, row in grid.tiles:
+            newrow = []
+            for x, w, tile in row:
+                logger.debug("Tile (%d, %d) %s...", x, y, tile)
+                output = upscale_without_tiling(model, tile)
+                scale_factor = output.width // tile.width
+                logger.debug("=> %s (scale factor %s)", output, scale_factor)
+                newrow.append([x * scale_factor, w * scale_factor, output])
+                p.update(1)
+            newtiles.append([y * scale_factor, h * scale_factor, newrow])
+
+    newgrid = images.Grid(
+        newtiles,
+        tile_w=grid.tile_w * scale_factor,
+        tile_h=grid.tile_h * scale_factor,
+        image_w=grid.image_w * scale_factor,
+        image_h=grid.image_h * scale_factor,
+        overlap=grid.overlap * scale_factor,
+    )
+    return images.combine_grid(newgrid)
-- 
cgit v1.2.3


From 8100e901ab0c5b04d289eebb722c8a653b8beef1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 30 Dec 2023 22:41:53 +0300
Subject: fix error with RealESRGAN model failing to upscale fp32 image

---
 modules/upscaler_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'modules/upscaler_utils.py')

diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index 8bdda51c..39f78a0b 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -16,9 +16,13 @@ def upscale_without_tiling(model, img: Image.Image):
     img = img[:, :, ::-1]
     img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
     img = torch.from_numpy(img).float()
-    img = img.unsqueeze(0).to(devices.device_esrgan)
+
+    model_weight = next(iter(model.parameters()))
+    img = img.unsqueeze(0).to(device=model_weight.device, dtype=model_weight.dtype)
+
     with torch.no_grad():
         output = model(img)
+
     output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
     output = 255. * np.moveaxis(output, 0, 2)
     output = output.astype(np.uint8)
-- 
cgit v1.2.3


From 3be90740316f8fbb950b31d440458a5e8ed4beb3 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 31 Dec 2023 00:43:41 +0300
Subject: fix for the previous fix.

---
 modules/upscaler_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/upscaler_utils.py')

diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index 39f78a0b..dde5d7ad 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -17,7 +17,7 @@ def upscale_without_tiling(model, img: Image.Image):
     img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
     img = torch.from_numpy(img).float()
 
-    model_weight = next(iter(model.parameters()))
+    model_weight = next(iter(model.model.parameters()))
     img = img.unsqueeze(0).to(device=model_weight.device, dtype=model_weight.dtype)
 
     with torch.no_grad():
-- 
cgit v1.2.3


From 777af661a21821994993df3ef566b01df2bb61a0 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Sun, 31 Dec 2023 00:09:51 +0200
Subject: Be more clear about Spandrel model nomenclature

---
 extensions-builtin/SwinIR/scripts/swinir_model.py |  6 +++---
 modules/gfpgan_model.py                           | 10 ++++++----
 modules/modelloader.py                            |  2 +-
 modules/realesrgan_model.py                       |  4 ++--
 modules/upscaler_utils.py                         |  2 +-
 5 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'modules/upscaler_utils.py')

diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index aae159af..95c7ec64 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -71,7 +71,7 @@ class UpscalerSwinIR(Upscaler):
         else:
             filename = path
 
-        model = modelloader.load_spandrel_model(
+        model_descriptor = modelloader.load_spandrel_model(
             filename,
             device=self._get_device(),
             dtype=devices.dtype,
@@ -79,10 +79,10 @@ class UpscalerSwinIR(Upscaler):
         )
         if getattr(opts, 'SWIN_torch_compile', False):
             try:
-                model = torch.compile(model)
+                model_descriptor.model.compile()
             except Exception:
                 logger.warning("Failed to compile SwinIR model, fallback to JIT", exc_info=True)
-        return model
+        return model_descriptor
 
     def _get_device(self):
         return devices.get_device_for('swinir')
diff --git a/modules/gfpgan_model.py b/modules/gfpgan_model.py
index 48f8ad5e..445b0409 100644
--- a/modules/gfpgan_model.py
+++ b/modules/gfpgan_model.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 import logging
 import os
 
+import torch
+
 from modules import (
     devices,
     errors,
@@ -25,7 +27,7 @@ class FaceRestorerGFPGAN(face_restoration_utils.CommonFaceRestoration):
     def get_device(self):
         return devices.device_gfpgan
 
-    def load_net(self) -> None:
+    def load_net(self) -> torch.Module:
         for model_path in modelloader.load_models(
             model_path=self.model_path,
             model_url=model_url,
@@ -34,13 +36,13 @@ class FaceRestorerGFPGAN(face_restoration_utils.CommonFaceRestoration):
             ext_filter=['.pth'],
         ):
             if 'GFPGAN' in os.path.basename(model_path):
-                net = modelloader.load_spandrel_model(
+                model = modelloader.load_spandrel_model(
                     model_path,
                     device=self.get_device(),
                     expected_architecture='GFPGAN',
                 ).model
-                net.different_w = True  # see https://github.com/chaiNNer-org/spandrel/pull/81
-                return net
+                model.different_w = True  # see https://github.com/chaiNNer-org/spandrel/pull/81
+                return model
         raise ValueError("No GFPGAN model found")
 
     def restore(self, np_image):
diff --git a/modules/modelloader.py b/modules/modelloader.py
index 8bcee08c..a7194137 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -143,7 +143,7 @@ def load_spandrel_model(
     *,
     device: str | torch.device | None,
     half: bool = False,
-    dtype: str | None = None,
+    dtype: str | torch.dtype | None = None,
     expected_architecture: str | None = None,
 ) -> spandrel.ModelDescriptor:
     import spandrel
diff --git a/modules/realesrgan_model.py b/modules/realesrgan_model.py
index 65f2e880..4d35b695 100644
--- a/modules/realesrgan_model.py
+++ b/modules/realesrgan_model.py
@@ -36,14 +36,14 @@ class UpscalerRealESRGAN(Upscaler):
             errors.report(f"Unable to load RealESRGAN model {path}", exc_info=True)
             return img
 
-        mod = modelloader.load_spandrel_model(
+        model_descriptor = modelloader.load_spandrel_model(
             info.local_data_path,
             device=self.device,
             half=(not cmd_opts.no_half and not cmd_opts.upcast_sampling),
             expected_architecture="ESRGAN",  # "RealESRGAN" isn't a specific thing for Spandrel
         )
         return upscale_with_model(
-            mod,
+            model_descriptor,
             img,
             tile_size=opts.ESRGAN_tile,
             tile_overlap=opts.ESRGAN_tile_overlap,
diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index dde5d7ad..174c9bc3 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -6,7 +6,7 @@ import torch
 import tqdm
 from PIL import Image
 
-from modules import devices, images
+from modules import images
 
 logger = logging.getLogger(__name__)
 
-- 
cgit v1.2.3


From 6f86b62a1be7993073ba3a789d522e0b8870605a Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Sat, 30 Dec 2023 22:53:49 +0200
Subject: Deduplicate tiled inference code from SwinIR/ScuNET

---
 extensions-builtin/ScuNET/scripts/scunet_model.py | 55 ++++-------------
 extensions-builtin/SwinIR/scripts/swinir_model.py | 57 ++----------------
 modules/upscaler_utils.py                         | 72 ++++++++++++++++++++++-
 3 files changed, 87 insertions(+), 97 deletions(-)

(limited to 'modules/upscaler_utils.py')

diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py
index 5f3dd08b..f799cb76 100644
--- a/extensions-builtin/ScuNET/scripts/scunet_model.py
+++ b/extensions-builtin/ScuNET/scripts/scunet_model.py
@@ -3,12 +3,11 @@ import sys
 import PIL.Image
 import numpy as np
 import torch
-from tqdm import tqdm
 
 import modules.upscaler
 from modules import devices, modelloader, script_callbacks, errors
-
 from modules.shared import opts
+from modules.upscaler_utils import tiled_upscale_2
 
 
 class UpscalerScuNET(modules.upscaler.Upscaler):
@@ -40,47 +39,6 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
             scalers.append(scaler_data2)
         self.scalers = scalers
 
-    @staticmethod
-    @torch.no_grad()
-    def tiled_inference(img, model):
-        # test the image tile by tile
-        h, w = img.shape[2:]
-        tile = opts.SCUNET_tile
-        tile_overlap = opts.SCUNET_tile_overlap
-        if tile == 0:
-            return model(img)
-
-        device = devices.get_device_for('scunet')
-        assert tile % 8 == 0, "tile size should be a multiple of window_size"
-        sf = 1
-
-        stride = tile - tile_overlap
-        h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
-        w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
-        E = torch.zeros(1, 3, h * sf, w * sf, dtype=img.dtype, device=device)
-        W = torch.zeros_like(E, dtype=devices.dtype, device=device)
-
-        with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="ScuNET tiles") as pbar:
-            for h_idx in h_idx_list:
-
-                for w_idx in w_idx_list:
-
-                    in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
-
-                    out_patch = model(in_patch)
-                    out_patch_mask = torch.ones_like(out_patch)
-
-                    E[
-                        ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                    ].add_(out_patch)
-                    W[
-                        ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                    ].add_(out_patch_mask)
-                    pbar.update(1)
-        output = E.div_(W)
-
-        return output
-
     def do_upscale(self, img: PIL.Image.Image, selected_file):
 
         devices.torch_gc()
@@ -104,7 +62,16 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
             _img[:, :, :h, :w] = torch_img # pad image
             torch_img = _img
 
-        torch_output = self.tiled_inference(torch_img, model).squeeze(0)
+        with torch.no_grad():
+            torch_output = tiled_upscale_2(
+                torch_img,
+                model,
+                tile_size=opts.SCUNET_tile,
+                tile_overlap=opts.SCUNET_tile_overlap,
+                scale=1,
+                device=devices.get_device_for('scunet'),
+                desc="ScuNET tiles",
+            ).squeeze(0)
         torch_output = torch_output[:, :h * 1, :w * 1] # remove padding, if any
         np_output: np.ndarray = torch_output.float().cpu().clamp_(0, 1).numpy()
         del torch_img, torch_output
diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index 95c7ec64..8a555c79 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -4,11 +4,11 @@ import sys
 import numpy as np
 import torch
 from PIL import Image
-from tqdm import tqdm
 
 from modules import modelloader, devices, script_callbacks, shared
-from modules.shared import opts, state
+from modules.shared import opts
 from modules.upscaler import Upscaler, UpscalerData
+from modules.upscaler_utils import tiled_upscale_2
 
 SWINIR_MODEL_URL = "https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth"
 
@@ -110,14 +110,14 @@ def upscale(
         w_pad = (w_old // window_size + 1) * window_size - w_old
         img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, : h_old + h_pad, :]
         img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, : w_old + w_pad]
-        output = inference(
+        output = tiled_upscale_2(
             img,
             model,
-            tile=tile,
+            tile_size=tile,
             tile_overlap=tile_overlap,
-            window_size=window_size,
             scale=scale,
             device=device,
+            desc="SwinIR tiles",
         )
         output = output[..., : h_old * scale, : w_old * scale]
         output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
@@ -129,53 +129,6 @@ def upscale(
         return Image.fromarray(output, "RGB")
 
 
-def inference(
-    img,
-    model,
-    *,
-    tile: int,
-    tile_overlap: int,
-    window_size: int,
-    scale: int,
-    device,
-):
-    # test the image tile by tile
-    b, c, h, w = img.size()
-    tile = min(tile, h, w)
-    assert tile % window_size == 0, "tile size should be a multiple of window_size"
-    sf = scale
-
-    stride = tile - tile_overlap
-    h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
-    w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
-    E = torch.zeros(b, c, h * sf, w * sf, dtype=devices.dtype, device=device).type_as(img)
-    W = torch.zeros_like(E, dtype=devices.dtype, device=device)
-
-    with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="SwinIR tiles") as pbar:
-        for h_idx in h_idx_list:
-            if state.interrupted or state.skipped:
-                break
-
-            for w_idx in w_idx_list:
-                if state.interrupted or state.skipped:
-                    break
-
-                in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
-                out_patch = model(in_patch)
-                out_patch_mask = torch.ones_like(out_patch)
-
-                E[
-                ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                ].add_(out_patch)
-                W[
-                ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
-                ].add_(out_patch_mask)
-                pbar.update(1)
-    output = E.div_(W)
-
-    return output
-
-
 def on_ui_settings():
     import gradio as gr
 
diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index 174c9bc3..8e413854 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -6,7 +6,7 @@ import torch
 import tqdm
 from PIL import Image
 
-from modules import images
+from modules import images, shared
 
 logger = logging.getLogger(__name__)
 
@@ -68,3 +68,73 @@ def upscale_with_model(
         overlap=grid.overlap * scale_factor,
     )
     return images.combine_grid(newgrid)
+
+
+def tiled_upscale_2(
+    img,
+    model,
+    *,
+    tile_size: int,
+    tile_overlap: int,
+    scale: int,
+    device,
+    desc="Tiled upscale",
+):
+    # Alternative implementation of `upscale_with_model` originally used by
+    # SwinIR and ScuNET.  It differs from `upscale_with_model` in that tiling and
+    # weighting is done in PyTorch space, as opposed to `images.Grid` doing it in
+    # Pillow space without weighting.
+    b, c, h, w = img.size()
+    tile_size = min(tile_size, h, w)
+
+    if tile_size <= 0:
+        logger.debug("Upscaling %s without tiling", img.shape)
+        return model(img)
+
+    stride = tile_size - tile_overlap
+    h_idx_list = list(range(0, h - tile_size, stride)) + [h - tile_size]
+    w_idx_list = list(range(0, w - tile_size, stride)) + [w - tile_size]
+    result = torch.zeros(
+        b,
+        c,
+        h * scale,
+        w * scale,
+        device=device,
+    ).type_as(img)
+    weights = torch.zeros_like(result)
+    logger.debug("Upscaling %s to %s with tiles", img.shape, result.shape)
+    with tqdm.tqdm(total=len(h_idx_list) * len(w_idx_list), desc=desc) as pbar:
+        for h_idx in h_idx_list:
+            if shared.state.interrupted or shared.state.skipped:
+                break
+
+            for w_idx in w_idx_list:
+                if shared.state.interrupted or shared.state.skipped:
+                    break
+
+                in_patch = img[
+                    ...,
+                    h_idx : h_idx + tile_size,
+                    w_idx : w_idx + tile_size,
+                ]
+                out_patch = model(in_patch)
+
+                result[
+                    ...,
+                    h_idx * scale : (h_idx + tile_size) * scale,
+                    w_idx * scale : (w_idx + tile_size) * scale,
+                ].add_(out_patch)
+
+                out_patch_mask = torch.ones_like(out_patch)
+
+                weights[
+                    ...,
+                    h_idx * scale : (h_idx + tile_size) * scale,
+                    w_idx * scale : (w_idx + tile_size) * scale,
+                ].add_(out_patch_mask)
+
+                pbar.update(1)
+
+    output = result.div_(weights)
+
+    return output
-- 
cgit v1.2.3


From 5768afc776a66bb94e77a9c1daebeea58fa731d5 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Sun, 31 Dec 2023 00:20:30 +0200
Subject: Add utility to inspect a model's parameters (to get dtype/device)

---
 modules/devices.py        |  3 ++-
 modules/interrogate.py    |  3 ++-
 modules/sd_models_xl.py   |  3 ++-
 modules/torch_utils.py    | 17 +++++++++++++++++
 modules/upscaler_utils.py |  5 +++--
 modules/xlmr.py           |  5 ++++-
 modules/xlmr_m18.py       |  5 ++++-
 test/test_torch_utils.py  | 19 +++++++++++++++++++
 8 files changed, 53 insertions(+), 7 deletions(-)
 create mode 100644 modules/torch_utils.py
 create mode 100644 test/test_torch_utils.py

(limited to 'modules/upscaler_utils.py')

diff --git a/modules/devices.py b/modules/devices.py
index c956207f..bd6bd579 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -4,6 +4,7 @@ from functools import lru_cache
 
 import torch
 from modules import errors, shared
+from modules.torch_utils import get_param
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -131,7 +132,7 @@ patch_module_list = [
 
 
 def manual_cast_forward(self, *args, **kwargs):
-    org_dtype = next(self.parameters()).dtype
+    org_dtype = get_param(self).dtype
     self.to(dtype)
     args = [arg.to(dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
     kwargs = {k: v.to(dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 3045560d..5be5a10f 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -11,6 +11,7 @@ from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
 
 from modules import devices, paths, shared, lowvram, modelloader, errors
+from modules.torch_utils import get_param
 
 blip_image_eval_size = 384
 clip_model_name = 'ViT-L/14'
@@ -131,7 +132,7 @@ class InterrogateModels:
 
         self.clip_model = self.clip_model.to(devices.device_interrogate)
 
-        self.dtype = next(self.clip_model.parameters()).dtype
+        self.dtype = get_param(self.clip_model).dtype
 
     def send_clip_to_ram(self):
         if not shared.opts.interrogate_keep_models_in_memory:
diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index 1de31b0d..c3602a7e 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -6,6 +6,7 @@ import sgm.models.diffusion
 import sgm.modules.diffusionmodules.denoiser_scaling
 import sgm.modules.diffusionmodules.discretizer
 from modules import devices, shared, prompt_parser
+from modules.torch_utils import get_param
 
 
 def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: prompt_parser.SdConditioning | list[str]):
@@ -90,7 +91,7 @@ sgm.modules.GeneralConditioner.get_target_prompt_token_count = get_target_prompt
 def extend_sdxl(model):
     """this adds a bunch of parameters to make SDXL model look a bit more like SD1.5 to the rest of the codebase."""
 
-    dtype = next(model.model.diffusion_model.parameters()).dtype
+    dtype = get_param(model.model.diffusion_model).dtype
     model.model.diffusion_model.dtype = dtype
     model.model.conditioning_key = 'crossattn'
     model.cond_stage_key = 'txt'
diff --git a/modules/torch_utils.py b/modules/torch_utils.py
new file mode 100644
index 00000000..e5b52393
--- /dev/null
+++ b/modules/torch_utils.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import torch.nn
+
+
+def get_param(model) -> torch.nn.Parameter:
+    """
+    Find the first parameter in a model or module.
+    """
+    if hasattr(model, "model") and hasattr(model.model, "parameters"):
+        # Unpeel a model descriptor to get at the actual Torch module.
+        model = model.model
+
+    for param in model.parameters():
+        return param
+
+    raise ValueError(f"No parameters found in model {model!r}")
diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index 8e413854..c60e3beb 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -7,6 +7,7 @@ import tqdm
 from PIL import Image
 
 from modules import images, shared
+from modules.torch_utils import get_param
 
 logger = logging.getLogger(__name__)
 
@@ -17,8 +18,8 @@ def upscale_without_tiling(model, img: Image.Image):
     img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
     img = torch.from_numpy(img).float()
 
-    model_weight = next(iter(model.model.parameters()))
-    img = img.unsqueeze(0).to(device=model_weight.device, dtype=model_weight.dtype)
+    param = get_param(model)
+    img = img.unsqueeze(0).to(device=param.device, dtype=param.dtype)
 
     with torch.no_grad():
         output = model(img)
diff --git a/modules/xlmr.py b/modules/xlmr.py
index a407a3ca..6e000a56 100644
--- a/modules/xlmr.py
+++ b/modules/xlmr.py
@@ -5,6 +5,9 @@ from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRoberta
 from transformers import XLMRobertaModel,XLMRobertaTokenizer
 from typing import Optional
 
+from modules.torch_utils import get_param
+
+
 class BertSeriesConfig(BertConfig):
     def __init__(self, vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, position_embedding_type="absolute", use_cache=True, classifier_dropout=None,project_dim=512, pooler_fn="average",learn_encoder=False,model_type='bert',**kwargs):
 
@@ -62,7 +65,7 @@ class BertSeriesModelWithTransformation(BertPreTrainedModel):
         self.post_init()
 
     def encode(self,c):
-        device = next(self.parameters()).device
+        device = get_param(self).device
         text = self.tokenizer(c,
                         truncation=True,
                         max_length=77,
diff --git a/modules/xlmr_m18.py b/modules/xlmr_m18.py
index a727e865..e3e81961 100644
--- a/modules/xlmr_m18.py
+++ b/modules/xlmr_m18.py
@@ -5,6 +5,9 @@ from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRoberta
 from transformers import XLMRobertaModel,XLMRobertaTokenizer
 from typing import Optional
 
+from modules.torch_utils import get_param
+
+
 class BertSeriesConfig(BertConfig):
     def __init__(self, vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, position_embedding_type="absolute", use_cache=True, classifier_dropout=None,project_dim=512, pooler_fn="average",learn_encoder=False,model_type='bert',**kwargs):
 
@@ -68,7 +71,7 @@ class BertSeriesModelWithTransformation(BertPreTrainedModel):
         self.post_init()
 
     def encode(self,c):
-        device = next(self.parameters()).device
+        device = get_param(self).device
         text = self.tokenizer(c,
                         truncation=True,
                         max_length=77,
diff --git a/test/test_torch_utils.py b/test/test_torch_utils.py
new file mode 100644
index 00000000..f1aec832
--- /dev/null
+++ b/test/test_torch_utils.py
@@ -0,0 +1,19 @@
+import types
+
+import pytest
+import torch
+
+from modules.torch_utils import get_param
+
+
+@pytest.mark.parametrize("wrapped", [True, False])
+def test_get_param(wrapped):
+    mod = torch.nn.Linear(1, 1)
+    cpu = torch.device("cpu")
+    mod.to(dtype=torch.float16, device=cpu)
+    if wrapped:
+        # more or less how spandrel wraps a thing
+        mod = types.SimpleNamespace(model=mod)
+    p = get_param(mod)
+    assert p.dtype == torch.float16
+    assert p.device == cpu
-- 
cgit v1.2.3


From a70dfb64a86b9b6d869deffdb0ffebe980365473 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sun, 31 Dec 2023 22:38:30 +0300
Subject: change import statements for #14478

---
 modules/devices.py        | 4 ++--
 modules/interrogate.py    | 5 ++---
 modules/sd_models_xl.py   | 4 ++--
 modules/upscaler_utils.py | 5 ++---
 modules/xlmr.py           | 4 ++--
 modules/xlmr_m18.py       | 5 ++---
 test/test_torch_utils.py  | 4 ++--
 7 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'modules/upscaler_utils.py')

diff --git a/modules/devices.py b/modules/devices.py
index bd6bd579..ff279ac5 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -4,7 +4,7 @@ from functools import lru_cache
 
 import torch
 from modules import errors, shared
-from modules.torch_utils import get_param
+from modules import torch_utils
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -132,7 +132,7 @@ patch_module_list = [
 
 
 def manual_cast_forward(self, *args, **kwargs):
-    org_dtype = get_param(self).dtype
+    org_dtype = torch_utils.get_param(self).dtype
     self.to(dtype)
     args = [arg.to(dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
     kwargs = {k: v.to(dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 5be5a10f..35a627ca 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -10,8 +10,7 @@ import torch.hub
 from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
 
-from modules import devices, paths, shared, lowvram, modelloader, errors
-from modules.torch_utils import get_param
+from modules import devices, paths, shared, lowvram, modelloader, errors, torch_utils
 
 blip_image_eval_size = 384
 clip_model_name = 'ViT-L/14'
@@ -132,7 +131,7 @@ class InterrogateModels:
 
         self.clip_model = self.clip_model.to(devices.device_interrogate)
 
-        self.dtype = get_param(self.clip_model).dtype
+        self.dtype = torch_utils.get_param(self.clip_model).dtype
 
     def send_clip_to_ram(self):
         if not shared.opts.interrogate_keep_models_in_memory:
diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index c3602a7e..0de17af3 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -6,7 +6,7 @@ import sgm.models.diffusion
 import sgm.modules.diffusionmodules.denoiser_scaling
 import sgm.modules.diffusionmodules.discretizer
 from modules import devices, shared, prompt_parser
-from modules.torch_utils import get_param
+from modules import torch_utils
 
 
 def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: prompt_parser.SdConditioning | list[str]):
@@ -91,7 +91,7 @@ sgm.modules.GeneralConditioner.get_target_prompt_token_count = get_target_prompt
 def extend_sdxl(model):
     """this adds a bunch of parameters to make SDXL model look a bit more like SD1.5 to the rest of the codebase."""
 
-    dtype = get_param(model.model.diffusion_model).dtype
+    dtype = torch_utils.get_param(model.model.diffusion_model).dtype
     model.model.diffusion_model.dtype = dtype
     model.model.conditioning_key = 'crossattn'
     model.cond_stage_key = 'txt'
diff --git a/modules/upscaler_utils.py b/modules/upscaler_utils.py
index c60e3beb..f5cb92d5 100644
--- a/modules/upscaler_utils.py
+++ b/modules/upscaler_utils.py
@@ -6,8 +6,7 @@ import torch
 import tqdm
 from PIL import Image
 
-from modules import images, shared
-from modules.torch_utils import get_param
+from modules import images, shared, torch_utils
 
 logger = logging.getLogger(__name__)
 
@@ -18,7 +17,7 @@ def upscale_without_tiling(model, img: Image.Image):
     img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
     img = torch.from_numpy(img).float()
 
-    param = get_param(model)
+    param = torch_utils.get_param(model)
     img = img.unsqueeze(0).to(device=param.device, dtype=param.dtype)
 
     with torch.no_grad():
diff --git a/modules/xlmr.py b/modules/xlmr.py
index 6e000a56..319771b7 100644
--- a/modules/xlmr.py
+++ b/modules/xlmr.py
@@ -5,7 +5,7 @@ from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRoberta
 from transformers import XLMRobertaModel,XLMRobertaTokenizer
 from typing import Optional
 
-from modules.torch_utils import get_param
+from modules import torch_utils
 
 
 class BertSeriesConfig(BertConfig):
@@ -65,7 +65,7 @@ class BertSeriesModelWithTransformation(BertPreTrainedModel):
         self.post_init()
 
     def encode(self,c):
-        device = get_param(self).device
+        device = torch_utils.get_param(self).device
         text = self.tokenizer(c,
                         truncation=True,
                         max_length=77,
diff --git a/modules/xlmr_m18.py b/modules/xlmr_m18.py
index e3e81961..f6055504 100644
--- a/modules/xlmr_m18.py
+++ b/modules/xlmr_m18.py
@@ -4,8 +4,7 @@ import torch
 from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRobertaConfig
 from transformers import XLMRobertaModel,XLMRobertaTokenizer
 from typing import Optional
-
-from modules.torch_utils import get_param
+from modules import torch_utils
 
 
 class BertSeriesConfig(BertConfig):
@@ -71,7 +70,7 @@ class BertSeriesModelWithTransformation(BertPreTrainedModel):
         self.post_init()
 
     def encode(self,c):
-        device = get_param(self).device
+        device = torch_utils.get_param(self).device
         text = self.tokenizer(c,
                         truncation=True,
                         max_length=77,
diff --git a/test/test_torch_utils.py b/test/test_torch_utils.py
index f1aec832..23ccb93a 100644
--- a/test/test_torch_utils.py
+++ b/test/test_torch_utils.py
@@ -3,7 +3,7 @@ import types
 import pytest
 import torch
 
-from modules.torch_utils import get_param
+from modules import torch_utils
 
 
 @pytest.mark.parametrize("wrapped", [True, False])
@@ -14,6 +14,6 @@ def test_get_param(wrapped):
     if wrapped:
         # more or less how spandrel wraps a thing
         mod = types.SimpleNamespace(model=mod)
-    p = get_param(mod)
+    p = torch_utils.get_param(mod)
     assert p.dtype == torch.float16
     assert p.device == cpu
-- 
cgit v1.2.3