From 948eff4b3caa237334389a5a08adda130e2b43a5 Mon Sep 17 00:00:00 2001
From: C43H66N12O12S2 <36072735+C43H66N12O12S2@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:36:20 +0300
Subject: make swinir actually useful

---
 modules/swinir.py | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 swinir.py         | 74 --------------------------------------------
 2 files changed, 92 insertions(+), 74 deletions(-)
 create mode 100644 modules/swinir.py
 delete mode 100644 swinir.py

diff --git a/modules/swinir.py b/modules/swinir.py
new file mode 100644
index 00000000..6c7f0a2d
--- /dev/null
+++ b/modules/swinir.py
@@ -0,0 +1,92 @@
+import sys
+import traceback
+import cv2
+from collections import OrderedDict
+import os
+import requests
+from collections import namedtuple
+import numpy as np
+from PIL import Image
+import torch
+import modules.images
+from modules.shared import cmd_opts, opts, device
+from modules.swinir_arch import SwinIR as net
+precision_scope = torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext
+def load_model(task = "realsr", large_model = True, model_path="C:/sd/ESRGANn/4x-large.pth", scale=4):
+
+    try:
+        modules.shared.sd_upscalers.append(UpscalerSwin("McSwinnySwin"))
+    except Exception:
+        print(f"Error loading ESRGAN model", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+    if not large_model:
+    # use 'nearest+conv' to avoid block artifacts
+        model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,
+                    img_range=1., depths=[6, 6, 6, 6, 6, 6], embed_dim=180, num_heads=[6, 6, 6, 6, 6, 6],
+                    mlp_ratio=2, upsampler='nearest+conv', resi_connection='1conv')
+    else:
+        # larger model size; use '3conv' to save parameters and memory; use ema for GAN training
+        model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,
+                    img_range=1., depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], embed_dim=240,
+                    num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],
+                    mlp_ratio=2, upsampler='nearest+conv', resi_connection='3conv')
+    
+    pretrained_model = torch.load(model_path)
+    model.load_state_dict(pretrained_model["params_ema"], strict=True)
+
+    return model.half().to(device)
+    
+def upscale(img, tile=opts.ESRGAN_tile, tile_overlap=opts.ESRGAN_tile_overlap, window_size = 8, scale = 4):
+    img = np.array(img)
+    img = img[:, :, ::-1]
+    img = np.moveaxis(img, 2, 0) / 255
+    img = torch.from_numpy(img).float()
+    img = img.unsqueeze(0).to(device)
+    model = load_model()
+    with torch.no_grad(), precision_scope("cuda"):
+        _, _, h_old, w_old = img.size()
+        h_pad = (h_old // window_size + 1) * window_size - h_old
+        w_pad = (w_old // window_size + 1) * window_size - w_old
+        img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, :h_old + h_pad, :]
+        img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, :w_old + w_pad]
+        output = inference(img, model, tile, tile_overlap, window_size, scale)
+        output = output[..., :h_old * scale, :w_old * scale]
+        output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+        if output.ndim == 3:
+            output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))  # CHW-RGB to HCW-BGR
+        output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
+        return Image.fromarray(output, 'RGB')
+    
+    
+def inference(img, model, tile, tile_overlap, window_size, scale):
+    # test the image tile by tile
+    b, c, h, w = img.size()
+    tile = min(tile, h, w)
+    assert tile % window_size == 0, "tile size should be a multiple of window_size"
+    sf = scale
+
+    stride = tile - tile_overlap
+    h_idx_list = list(range(0, h-tile, stride)) + [h-tile]
+    w_idx_list = list(range(0, w-tile, stride)) + [w-tile]
+    E = torch.zeros(b, c, h*sf, w*sf, dtype=torch.half, device=device).type_as(img)
+    W = torch.zeros_like(E, dtype=torch.half, device=device)
+
+    for h_idx in h_idx_list:
+        for w_idx in w_idx_list:
+            in_patch = img[..., h_idx:h_idx+tile, w_idx:w_idx+tile]
+            out_patch = model(in_patch)
+            out_patch_mask = torch.ones_like(out_patch)
+
+            E[..., h_idx*sf:(h_idx+tile)*sf, w_idx*sf:(w_idx+tile)*sf].add_(out_patch)
+            W[..., h_idx*sf:(h_idx+tile)*sf, w_idx*sf:(w_idx+tile)*sf].add_(out_patch_mask)
+    output = E.div_(W)
+
+    return output
+    
+class UpscalerSwin(modules.images.Upscaler):
+    def __init__(self, title):
+        self.name = title
+
+    def do_upscale(self, img):
+        img = upscale(img)
+        return img
diff --git a/swinir.py b/swinir.py
deleted file mode 100644
index cb2bbe3d..00000000
--- a/swinir.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import sys
-import traceback
-import cv2
-from collections import OrderedDict
-import os
-import requests
-from collections import namedtuple
-import numpy as np
-from PIL import Image
-import torch
-import modules.images
-from modules.shared import cmd_opts, opts, device
-from modules.swinir_arch import SwinIR as net
-precision_scope = torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext
-def load_model(task = "realsr", large_model = True, model_path=next(os.listdir(cmd_opts.esrgan_models_path))):
-    if not large_model:
-    # use 'nearest+conv' to avoid block artifacts
-        model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,
-                    img_range=1., depths=[6, 6, 6, 6, 6, 6], embed_dim=180, num_heads=[6, 6, 6, 6, 6, 6],
-                    mlp_ratio=2, upsampler='nearest+conv', resi_connection='1conv')
-    else:
-        # larger model size; use '3conv' to save parameters and memory; use ema for GAN training
-        model = net(upscale=scale, in_chans=3, img_size=64, window_size=8,
-                    img_range=1., depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], embed_dim=240,
-                    num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],
-                    mlp_ratio=2, upsampler='nearest+conv', resi_connection='3conv')
-    
-    pretrained_model = torch.load(model_path)
-    model.load_state_dict(pretrained_model, strict=True)
-
-    return model.half().to(device)
-    
-def upscale(img, tile=opts.ESRGAN_tile, tile_overlap=opts.ESRGAN_tile_overlap, window_size = 8, scale = 4):
-    img = cv2.imread(img, cv2.IMREAD_COLOR).astype(np.float16) / 255.
-    model = load_model()
-    with torch.no_grad(), precision_scope("cuda"):
-        _, _, h_old, w_old = img.size()
-        h_pad = (h_old // window_size + 1) * window_size - h_old
-        w_pad = (w_old // window_size + 1) * window_size - w_old
-        img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, :h_old + h_pad, :]
-        img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, :w_old + w_pad]
-        output = inference(img, model, tile, tile_overlap, window_size, scale)
-        output = output[..., :h_old * scale, :w_old * scale]
-        output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-        if output.ndim == 3:
-            output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))  # CHW-RGB to HCW-BGR
-        output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
-        return output
-    
-    
-def inference(img, model, tile, tile_overlap, window_size, scale):
-    # test the image tile by tile
-    b, c, h, w = img.size()
-    tile = min(tile, h, w)
-    assert tile % window_size == 0, "tile size should be a multiple of window_size"
-    sf = scale
-
-    stride = tile - tile_overlap
-    h_idx_list = list(range(0, h-tile, stride)) + [h-tile]
-    w_idx_list = list(range(0, w-tile, stride)) + [w-tile]
-    E = torch.zeros(b, c, h*sf, w*sf, dtype=torch.half, device=device).type_as(img)
-    W = torch.zeros_like(E, dtype=torch.half, device=device)
-
-    for h_idx in h_idx_list:
-        for w_idx in w_idx_list:
-            in_patch = img[..., h_idx:h_idx+tile, w_idx:w_idx+tile]
-            out_patch = model(in_patch)
-            out_patch_mask = torch.ones_like(out_patch)
-
-            E[..., h_idx*sf:(h_idx+tile)*sf, w_idx*sf:(w_idx+tile)*sf].add_(out_patch)
-            W[..., h_idx*sf:(h_idx+tile)*sf, w_idx*sf:(w_idx+tile)*sf].add_(out_patch_mask)
-    output = E.div_(W)
-
-    return output
\ No newline at end of file
-- 
cgit v1.2.3