From 47033afa5c08e72b622348b0bcfd71fd1a66e2cb Mon Sep 17 00:00:00 2001
From: AngelBottomless <aria1th@naver.com>
Date: Tue, 5 Sep 2023 22:38:02 +0900
Subject: Fix preview for textual inversion training

---
 modules/textual_inversion/textual_inversion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/textual_inversion')

diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index aa79dc09..401a0a2a 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -386,7 +386,7 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat
         assert log_directory, "Log directory is empty"
 
 
-def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, use_weight, create_image_every, save_embedding_every, template_filename, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, use_weight, create_image_every, save_embedding_every, template_filename, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_name, preview_cfg_scale, preview_seed, preview_width, preview_height):
     from modules import processing
 
     save_embedding_every = save_embedding_every or 0
@@ -590,7 +590,7 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st
                         p.prompt = preview_prompt
                         p.negative_prompt = preview_negative_prompt
                         p.steps = preview_steps
-                        p.sampler_name = sd_samplers.samplers[preview_sampler_index].name
+                        p.sampler_name = sd_samplers.samplers_map[preview_sampler_name.lower()]
                         p.cfg_scale = preview_cfg_scale
                         p.seed = preview_seed
                         p.width = preview_width
-- 
cgit v1.2.3


From a8cbe50c9fa324ed887089e4333452ecc4355c92 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 14 Oct 2023 12:14:56 +0300
Subject: remove duplicated code

---
 extensions-builtin/Lora/networks.py            | 31 +----------
 modules/textual_inversion/textual_inversion.py | 74 ++++++++++++++------------
 2 files changed, 42 insertions(+), 63 deletions(-)

(limited to 'modules/textual_inversion')

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 12f70576..d5f0f9f1 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -15,7 +15,7 @@ import torch
 from typing import Union
 
 from modules import shared, devices, sd_models, errors, scripts, sd_hijack
-from modules.textual_inversion.textual_inversion import Embedding
+import modules.textual_inversion.textual_inversion as textual_inversion
 
 from lora_logger import logger
 
@@ -210,34 +210,7 @@ def load_network(name, network_on_disk):
 
     embeddings = {}
     for emb_name, data in bundle_embeddings.items():
-        # textual inversion embeddings
-        if 'string_to_param' in data:
-            param_dict = data['string_to_param']
-            param_dict = getattr(param_dict, '_parameters', param_dict)  # fix for torch 1.12.1 loading saved file from torch 1.11
-            assert len(param_dict) == 1, 'embedding file has multiple terms in it'
-            emb = next(iter(param_dict.items()))[1]
-            vec = emb.detach().to(devices.device, dtype=torch.float32)
-            shape = vec.shape[-1]
-            vectors = vec.shape[0]
-        elif type(data) == dict and 'clip_g' in data and 'clip_l' in data:  # SDXL embedding
-            vec = {k: v.detach().to(devices.device, dtype=torch.float32) for k, v in data.items()}
-            shape = data['clip_g'].shape[-1] + data['clip_l'].shape[-1]
-            vectors = data['clip_g'].shape[0]
-        elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor: # diffuser concepts
-            assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
-
-            emb = next(iter(data.values()))
-            if len(emb.shape) == 1:
-                emb = emb.unsqueeze(0)
-            vec = emb.detach().to(devices.device, dtype=torch.float32)
-            shape = vec.shape[-1]
-            vectors = vec.shape[0]
-        else:
-            raise Exception(f"Couldn't identify {emb_name} in lora: {name} as neither textual inversion embedding nor diffuser concept.")
-
-        embedding = Embedding(vec, emb_name)
-        embedding.vectors = vectors
-        embedding.shape = shape
+        embedding = textual_inversion.create_embedding_from_data(data, emb_name, filename=network_on_disk.filename + "/" + emb_name)
         embedding.loaded = None
         embeddings[emb_name] = embedding
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 401a0a2a..04dda585 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -181,40 +181,7 @@ class EmbeddingDatabase:
         else:
             return
 
-
-        # textual inversion embeddings
-        if 'string_to_param' in data:
-            param_dict = data['string_to_param']
-            param_dict = getattr(param_dict, '_parameters', param_dict)  # fix for torch 1.12.1 loading saved file from torch 1.11
-            assert len(param_dict) == 1, 'embedding file has multiple terms in it'
-            emb = next(iter(param_dict.items()))[1]
-            vec = emb.detach().to(devices.device, dtype=torch.float32)
-            shape = vec.shape[-1]
-            vectors = vec.shape[0]
-        elif type(data) == dict and 'clip_g' in data and 'clip_l' in data:  # SDXL embedding
-            vec = {k: v.detach().to(devices.device, dtype=torch.float32) for k, v in data.items()}
-            shape = data['clip_g'].shape[-1] + data['clip_l'].shape[-1]
-            vectors = data['clip_g'].shape[0]
-        elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor: # diffuser concepts
-            assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
-
-            emb = next(iter(data.values()))
-            if len(emb.shape) == 1:
-                emb = emb.unsqueeze(0)
-            vec = emb.detach().to(devices.device, dtype=torch.float32)
-            shape = vec.shape[-1]
-            vectors = vec.shape[0]
-        else:
-            raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
-
-        embedding = Embedding(vec, name)
-        embedding.step = data.get('step', None)
-        embedding.sd_checkpoint = data.get('sd_checkpoint', None)
-        embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
-        embedding.vectors = vectors
-        embedding.shape = shape
-        embedding.filename = path
-        embedding.set_hash(hashes.sha256(embedding.filename, "textual_inversion/" + name) or '')
+        embedding = create_embedding_from_data(data, name, filename=filename, filepath=path)
 
         if self.expected_shape == -1 or self.expected_shape == embedding.shape:
             self.register_embedding(embedding, shared.sd_model)
@@ -313,6 +280,45 @@ def create_embedding(name, num_vectors_per_token, overwrite_old, init_text='*'):
     return fn
 
 
+def create_embedding_from_data(data, name, filename='unknown embedding file', filepath=None):
+    if 'string_to_param' in data:  # textual inversion embeddings
+        param_dict = data['string_to_param']
+        param_dict = getattr(param_dict, '_parameters', param_dict)  # fix for torch 1.12.1 loading saved file from torch 1.11
+        assert len(param_dict) == 1, 'embedding file has multiple terms in it'
+        emb = next(iter(param_dict.items()))[1]
+        vec = emb.detach().to(devices.device, dtype=torch.float32)
+        shape = vec.shape[-1]
+        vectors = vec.shape[0]
+    elif type(data) == dict and 'clip_g' in data and 'clip_l' in data:  # SDXL embedding
+        vec = {k: v.detach().to(devices.device, dtype=torch.float32) for k, v in data.items()}
+        shape = data['clip_g'].shape[-1] + data['clip_l'].shape[-1]
+        vectors = data['clip_g'].shape[0]
+    elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:  # diffuser concepts
+        assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
+
+        emb = next(iter(data.values()))
+        if len(emb.shape) == 1:
+            emb = emb.unsqueeze(0)
+        vec = emb.detach().to(devices.device, dtype=torch.float32)
+        shape = vec.shape[-1]
+        vectors = vec.shape[0]
+    else:
+        raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
+
+    embedding = Embedding(vec, name)
+    embedding.step = data.get('step', None)
+    embedding.sd_checkpoint = data.get('sd_checkpoint', None)
+    embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
+    embedding.vectors = vectors
+    embedding.shape = shape
+
+    if filepath:
+        embedding.filename = filepath
+        embedding.set_hash(hashes.sha256(filepath, "textual_inversion/" + name) or '')
+
+    return embedding
+
+
 def write_loss(log_directory, filename, step, epoch_len, values):
     if shared.opts.training_write_csv_every == 0:
         return
-- 
cgit v1.2.3


From 03ee297aa22296ea12b965fc1cb11aa46375d372 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Mon, 27 Nov 2023 17:26:16 +0900
Subject: fix Auto focal point crop for opencv >= 4.8.x

autocrop.download_and_cache_models
in opencv >= 4.8 the face detection model was updated
download the base on opencv version
returns the model path or raise exception
---
 modules/textual_inversion/autocrop.py   | 29 ++++++++++++++++-------------
 modules/textual_inversion/preprocess.py |  4 ++--
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'modules/textual_inversion')

diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py
index 1675e39a..051be118 100644
--- a/modules/textual_inversion/autocrop.py
+++ b/modules/textual_inversion/autocrop.py
@@ -3,6 +3,8 @@ import requests
 import os
 import numpy as np
 from PIL import ImageDraw
+from modules import paths_internal
+from pkg_resources import parse_version
 
 GREEN = "#0F0"
 BLUE = "#00F"
@@ -294,22 +296,23 @@ def is_square(w, h):
     return w == h
 
 
-def download_and_cache_models(dirname):
-    download_url = 'https://github.com/opencv/opencv_zoo/blob/91fb0290f50896f38a0ab1e558b74b16bc009428/models/face_detection_yunet/face_detection_yunet_2022mar.onnx?raw=true'
-    model_file_name = 'face_detection_yunet.onnx'
+model_dir_opencv = os.path.join(paths_internal.models_path, 'opencv')
+if parse_version(cv2.__version__) >= parse_version('4.8'):
+    model_file_path = os.path.join(model_dir_opencv, 'face_detection_yunet_2023mar.onnx')
+    model_url = 'https://github.com/opencv/opencv_zoo/blob/b6e370b10f641879a87890d44e42173077154a05/models/face_detection_yunet/face_detection_yunet_2023mar.onnx?raw=true'
+else:
+    model_file_path = os.path.join(model_dir_opencv, 'face_detection_yunet.onnx')
+    model_url = 'https://github.com/opencv/opencv_zoo/blob/91fb0290f50896f38a0ab1e558b74b16bc009428/models/face_detection_yunet/face_detection_yunet_2022mar.onnx?raw=true'
 
-    os.makedirs(dirname, exist_ok=True)
 
-    cache_file = os.path.join(dirname, model_file_name)
-    if not os.path.exists(cache_file):
-        print(f"downloading face detection model from '{download_url}' to '{cache_file}'")
-        response = requests.get(download_url)
-        with open(cache_file, "wb") as f:
+def download_and_cache_models():
+    if not os.path.exists(model_file_path):
+        os.makedirs(model_dir_opencv, exist_ok=True)
+        print(f"downloading face detection model from '{model_url}' to '{model_file_path}'")
+        response = requests.get(model_url)
+        with open(model_file_path, "wb") as f:
             f.write(response.content)
-
-    if os.path.exists(cache_file):
-        return cache_file
-    return None
+    return model_file_path
 
 
 class PointOfInterest:
diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py
index dbd856bd..789fa083 100644
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@@ -3,7 +3,7 @@ from PIL import Image, ImageOps
 import math
 import tqdm
 
-from modules import paths, shared, images, deepbooru
+from modules import shared, images, deepbooru
 from modules.textual_inversion import autocrop
 
 
@@ -196,7 +196,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
 
             dnn_model_path = None
             try:
-                dnn_model_path = autocrop.download_and_cache_models(os.path.join(paths.models_path, "opencv"))
+                dnn_model_path = autocrop.download_and_cache_models()
             except Exception as e:
                 print("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", e)
 
-- 
cgit v1.2.3


From d608926f817b279d16b39a7875beec80d010a988 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Tue, 28 Nov 2023 12:12:27 +0900
Subject: reformat file with uniform indentation

---
 modules/textual_inversion/autocrop.py | 210 +++++++++++++++++-----------------
 1 file changed, 106 insertions(+), 104 deletions(-)

(limited to 'modules/textual_inversion')

diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py
index 051be118..e223a2e0 100644
--- a/modules/textual_inversion/autocrop.py
+++ b/modules/textual_inversion/autocrop.py
@@ -27,7 +27,6 @@ def crop_image(im, settings):
         elif is_portrait(settings.crop_width, settings.crop_height):
             scale_by = settings.crop_height / im.height
 
-
     im = im.resize((int(im.width * scale_by), int(im.height * scale_by)))
     im_debug = im.copy()
 
@@ -71,6 +70,7 @@ def crop_image(im, settings):
 
     return results
 
+
 def focal_point(im, settings):
     corner_points = image_corner_points(im, settings) if settings.corner_points_weight > 0 else []
     entropy_points = image_entropy_points(im, settings) if settings.entropy_points_weight > 0 else []
@@ -80,118 +80,120 @@ def focal_point(im, settings):
 
     weight_pref_total = 0
     if corner_points:
-      weight_pref_total += settings.corner_points_weight
+        weight_pref_total += settings.corner_points_weight
     if entropy_points:
-      weight_pref_total += settings.entropy_points_weight
+        weight_pref_total += settings.entropy_points_weight
     if face_points:
-      weight_pref_total += settings.face_points_weight
+        weight_pref_total += settings.face_points_weight
 
     corner_centroid = None
     if corner_points:
-      corner_centroid = centroid(corner_points)
-      corner_centroid.weight = settings.corner_points_weight / weight_pref_total
-      pois.append(corner_centroid)
+        corner_centroid = centroid(corner_points)
+        corner_centroid.weight = settings.corner_points_weight / weight_pref_total
+        pois.append(corner_centroid)
 
     entropy_centroid = None
     if entropy_points:
-      entropy_centroid = centroid(entropy_points)
-      entropy_centroid.weight = settings.entropy_points_weight / weight_pref_total
-      pois.append(entropy_centroid)
+        entropy_centroid = centroid(entropy_points)
+        entropy_centroid.weight = settings.entropy_points_weight / weight_pref_total
+        pois.append(entropy_centroid)
 
     face_centroid = None
     if face_points:
-      face_centroid = centroid(face_points)
-      face_centroid.weight = settings.face_points_weight / weight_pref_total
-      pois.append(face_centroid)
+        face_centroid = centroid(face_points)
+        face_centroid.weight = settings.face_points_weight / weight_pref_total
+        pois.append(face_centroid)
 
     average_point = poi_average(pois, settings)
 
     if settings.annotate_image:
-      d = ImageDraw.Draw(im)
-      max_size = min(im.width, im.height) * 0.07
-      if corner_centroid is not None:
-        color = BLUE
-        box = corner_centroid.bounding(max_size * corner_centroid.weight)
-        d.text((box[0], box[1]-15), f"Edge: {corner_centroid.weight:.02f}", fill=color)
-        d.ellipse(box, outline=color)
-        if len(corner_points) > 1:
-          for f in corner_points:
-            d.rectangle(f.bounding(4), outline=color)
-      if entropy_centroid is not None:
-        color = "#ff0"
-        box = entropy_centroid.bounding(max_size * entropy_centroid.weight)
-        d.text((box[0], box[1]-15), f"Entropy: {entropy_centroid.weight:.02f}", fill=color)
-        d.ellipse(box, outline=color)
-        if len(entropy_points) > 1:
-          for f in entropy_points:
-            d.rectangle(f.bounding(4), outline=color)
-      if face_centroid is not None:
-        color = RED
-        box = face_centroid.bounding(max_size * face_centroid.weight)
-        d.text((box[0], box[1]-15), f"Face: {face_centroid.weight:.02f}", fill=color)
-        d.ellipse(box, outline=color)
-        if len(face_points) > 1:
-          for f in face_points:
-            d.rectangle(f.bounding(4), outline=color)
-
-      d.ellipse(average_point.bounding(max_size), outline=GREEN)
+        d = ImageDraw.Draw(im)
+        max_size = min(im.width, im.height) * 0.07
+        if corner_centroid is not None:
+            color = BLUE
+            box = corner_centroid.bounding(max_size * corner_centroid.weight)
+            d.text((box[0], box[1] - 15), f"Edge: {corner_centroid.weight:.02f}", fill=color)
+            d.ellipse(box, outline=color)
+            if len(corner_points) > 1:
+                for f in corner_points:
+                    d.rectangle(f.bounding(4), outline=color)
+        if entropy_centroid is not None:
+            color = "#ff0"
+            box = entropy_centroid.bounding(max_size * entropy_centroid.weight)
+            d.text((box[0], box[1] - 15), f"Entropy: {entropy_centroid.weight:.02f}", fill=color)
+            d.ellipse(box, outline=color)
+            if len(entropy_points) > 1:
+                for f in entropy_points:
+                    d.rectangle(f.bounding(4), outline=color)
+        if face_centroid is not None:
+            color = RED
+            box = face_centroid.bounding(max_size * face_centroid.weight)
+            d.text((box[0], box[1] - 15), f"Face: {face_centroid.weight:.02f}", fill=color)
+            d.ellipse(box, outline=color)
+            if len(face_points) > 1:
+                for f in face_points:
+                    d.rectangle(f.bounding(4), outline=color)
+
+        d.ellipse(average_point.bounding(max_size), outline=GREEN)
 
     return average_point
 
 
 def image_face_points(im, settings):
     if settings.dnn_model_path is not None:
-      detector = cv2.FaceDetectorYN.create(
-          settings.dnn_model_path,
-          "",
-          (im.width, im.height),
-          0.9, # score threshold
-          0.3, # nms threshold
-          5000 # keep top k before nms
-      )
-      faces = detector.detect(np.array(im))
-      results = []
-      if faces[1] is not None:
-        for face in faces[1]:
-          x = face[0]
-          y = face[1]
-          w = face[2]
-          h = face[3]
-          results.append(
-            PointOfInterest(
-              int(x + (w * 0.5)), # face focus left/right is center
-              int(y + (h * 0.33)), # face focus up/down is close to the top of the head
-              size = w,
-              weight = 1/len(faces[1])
-            )
-          )
-      return results
+        detector = cv2.FaceDetectorYN.create(
+            settings.dnn_model_path,
+            "",
+            (im.width, im.height),
+            0.9,  # score threshold
+            0.3,  # nms threshold
+            5000  # keep top k before nms
+        )
+        faces = detector.detect(np.array(im))
+        results = []
+        if faces[1] is not None:
+            for face in faces[1]:
+                x = face[0]
+                y = face[1]
+                w = face[2]
+                h = face[3]
+                results.append(
+                    PointOfInterest(
+                        int(x + (w * 0.5)),  # face focus left/right is center
+                        int(y + (h * 0.33)),  # face focus up/down is close to the top of the head
+                        size=w,
+                        weight=1 / len(faces[1])
+                    )
+                )
+        return results
     else:
-      np_im = np.array(im)
-      gray = cv2.cvtColor(np_im, cv2.COLOR_BGR2GRAY)
-
-      tries = [
-        [ f'{cv2.data.haarcascades}haarcascade_eye.xml', 0.01 ],
-        [ f'{cv2.data.haarcascades}haarcascade_frontalface_default.xml', 0.05 ],
-        [ f'{cv2.data.haarcascades}haarcascade_profileface.xml', 0.05 ],
-        [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt.xml', 0.05 ],
-        [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt2.xml', 0.05 ],
-        [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt_tree.xml', 0.05 ],
-        [ f'{cv2.data.haarcascades}haarcascade_eye_tree_eyeglasses.xml', 0.05 ],
-        [ f'{cv2.data.haarcascades}haarcascade_upperbody.xml', 0.05 ]
-      ]
-      for t in tries:
-        classifier = cv2.CascadeClassifier(t[0])
-        minsize = int(min(im.width, im.height) * t[1]) # at least N percent of the smallest side
-        try:
-          faces = classifier.detectMultiScale(gray, scaleFactor=1.1,
-            minNeighbors=7, minSize=(minsize, minsize), flags=cv2.CASCADE_SCALE_IMAGE)
-        except Exception:
-          continue
-
-        if faces:
-          rects = [[f[0], f[1], f[0] + f[2], f[1] + f[3]] for f in faces]
-          return [PointOfInterest((r[0] +r[2]) // 2, (r[1] + r[3]) // 2, size=abs(r[0]-r[2]), weight=1/len(rects)) for r in rects]
+        np_im = np.array(im)
+        gray = cv2.cvtColor(np_im, cv2.COLOR_BGR2GRAY)
+
+        tries = [
+            [f'{cv2.data.haarcascades}haarcascade_eye.xml', 0.01],
+            [f'{cv2.data.haarcascades}haarcascade_frontalface_default.xml', 0.05],
+            [f'{cv2.data.haarcascades}haarcascade_profileface.xml', 0.05],
+            [f'{cv2.data.haarcascades}haarcascade_frontalface_alt.xml', 0.05],
+            [f'{cv2.data.haarcascades}haarcascade_frontalface_alt2.xml', 0.05],
+            [f'{cv2.data.haarcascades}haarcascade_frontalface_alt_tree.xml', 0.05],
+            [f'{cv2.data.haarcascades}haarcascade_eye_tree_eyeglasses.xml', 0.05],
+            [f'{cv2.data.haarcascades}haarcascade_upperbody.xml', 0.05]
+        ]
+        for t in tries:
+            classifier = cv2.CascadeClassifier(t[0])
+            minsize = int(min(im.width, im.height) * t[1])  # at least N percent of the smallest side
+            try:
+                faces = classifier.detectMultiScale(gray, scaleFactor=1.1,
+                                                    minNeighbors=7, minSize=(minsize, minsize),
+                                                    flags=cv2.CASCADE_SCALE_IMAGE)
+            except Exception:
+                continue
+
+            if faces:
+                rects = [[f[0], f[1], f[0] + f[2], f[1] + f[3]] for f in faces]
+                return [PointOfInterest((r[0] + r[2]) // 2, (r[1] + r[3]) // 2, size=abs(r[0] - r[2]),
+                                        weight=1 / len(rects)) for r in rects]
     return []
 
 
@@ -200,7 +202,7 @@ def image_corner_points(im, settings):
 
     # naive attempt at preventing focal points from collecting at watermarks near the bottom
     gd = ImageDraw.Draw(grayscale)
-    gd.rectangle([0, im.height*.9, im.width, im.height], fill="#999")
+    gd.rectangle([0, im.height * .9, im.width, im.height], fill="#999")
 
     np_im = np.array(grayscale)
 
@@ -208,7 +210,7 @@ def image_corner_points(im, settings):
         np_im,
         maxCorners=100,
         qualityLevel=0.04,
-        minDistance=min(grayscale.width, grayscale.height)*0.06,
+        minDistance=min(grayscale.width, grayscale.height) * 0.06,
         useHarrisDetector=False,
     )
 
@@ -217,8 +219,8 @@ def image_corner_points(im, settings):
 
     focal_points = []
     for point in points:
-      x, y = point.ravel()
-      focal_points.append(PointOfInterest(x, y, size=4, weight=1/len(points)))
+        x, y = point.ravel()
+        focal_points.append(PointOfInterest(x, y, size=4, weight=1 / len(points)))
 
     return focal_points
 
@@ -227,13 +229,13 @@ def image_entropy_points(im, settings):
     landscape = im.height < im.width
     portrait = im.height > im.width
     if landscape:
-      move_idx = [0, 2]
-      move_max = im.size[0]
+        move_idx = [0, 2]
+        move_max = im.size[0]
     elif portrait:
-      move_idx = [1, 3]
-      move_max = im.size[1]
+        move_idx = [1, 3]
+        move_max = im.size[1]
     else:
-      return []
+        return []
 
     e_max = 0
     crop_current = [0, 0, settings.crop_width, settings.crop_height]
@@ -243,14 +245,14 @@ def image_entropy_points(im, settings):
         e = image_entropy(crop)
 
         if (e > e_max):
-          e_max = e
-          crop_best = list(crop_current)
+            e_max = e
+            crop_best = list(crop_current)
 
         crop_current[move_idx[0]] += 4
         crop_current[move_idx[1]] += 4
 
-    x_mid = int(crop_best[0] + settings.crop_width/2)
-    y_mid = int(crop_best[1] + settings.crop_height/2)
+    x_mid = int(crop_best[0] + settings.crop_width / 2)
+    y_mid = int(crop_best[1] + settings.crop_height / 2)
 
     return [PointOfInterest(x_mid, y_mid, size=25, weight=1.0)]
 
-- 
cgit v1.2.3


From 11d23e8ca55c097ecfa255a05b63f194e25f08be Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 2 Dec 2023 18:01:11 +0300
Subject: remove Train/Preprocessing tab and put all its functionality into
 extras batch images mode

---
 javascript/ui.js                                |  17 ++
 modules/api/api.py                              |  15 --
 modules/api/models.py                           |   3 -
 modules/postprocessing.py                       |  92 +++++++---
 modules/scripts_postprocessing.py               |  86 ++++++++-
 modules/shared_options.py                       |   1 +
 modules/textual_inversion/preprocess.py         | 232 ------------------------
 modules/textual_inversion/ui.py                 |   7 -
 modules/ui.py                                   | 107 -----------
 modules/ui_postprocessing.py                    |  16 +-
 modules/ui_toprow.py                            |   6 +-
 scripts/postprocessing_caption.py               |  30 +++
 scripts/postprocessing_codeformer.py            |  16 +-
 scripts/postprocessing_create_flipped_copies.py |  32 ++++
 scripts/postprocessing_focal_crop.py            |  54 ++++++
 scripts/postprocessing_gfpgan.py                |  13 +-
 scripts/postprocessing_split_oversized.py       |  71 ++++++++
 scripts/postprocessing_upscale.py               |  12 ++
 scripts/processing_autosized_crop.py            |  64 +++++++
 19 files changed, 460 insertions(+), 414 deletions(-)
 delete mode 100644 modules/textual_inversion/preprocess.py
 create mode 100644 scripts/postprocessing_caption.py
 create mode 100644 scripts/postprocessing_create_flipped_copies.py
 create mode 100644 scripts/postprocessing_focal_crop.py
 create mode 100644 scripts/postprocessing_split_oversized.py
 create mode 100644 scripts/processing_autosized_crop.py

(limited to 'modules/textual_inversion')

diff --git a/javascript/ui.js b/javascript/ui.js
index 2e262602..410fc44e 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -170,6 +170,23 @@ function submit_img2img() {
     return res;
 }
 
+function submit_extras() {
+    showSubmitButtons('extras', false);
+
+    var id = randomId();
+
+    requestProgress(id, gradioApp().getElementById('extras_gallery_container'), gradioApp().getElementById('extras_gallery'), function() {
+        showSubmitButtons('extras', true);
+    });
+
+    var res = create_submit_args(arguments);
+
+    res[0] = id;
+
+    console.log(res);
+    return res;
+}
+
 function restoreProgressTxt2img() {
     showRestoreProgressButton("txt2img", false);
     var id = localGet("txt2img_task_id");
diff --git a/modules/api/api.py b/modules/api/api.py
index 09083874..b3d74e51 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -22,7 +22,6 @@ from modules.api import models
 from modules.shared import opts
 from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
 from modules.textual_inversion.textual_inversion import create_embedding, train_embedding
-from modules.textual_inversion.preprocess import preprocess
 from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
 from PIL import PngImagePlugin, Image
 from modules.sd_models_config import find_checkpoint_config_near_filename
@@ -235,7 +234,6 @@ class Api:
         self.add_api_route("/sdapi/v1/refresh-vae", self.refresh_vae, methods=["POST"])
         self.add_api_route("/sdapi/v1/create/embedding", self.create_embedding, methods=["POST"], response_model=models.CreateResponse)
         self.add_api_route("/sdapi/v1/create/hypernetwork", self.create_hypernetwork, methods=["POST"], response_model=models.CreateResponse)
-        self.add_api_route("/sdapi/v1/preprocess", self.preprocess, methods=["POST"], response_model=models.PreprocessResponse)
         self.add_api_route("/sdapi/v1/train/embedding", self.train_embedding, methods=["POST"], response_model=models.TrainResponse)
         self.add_api_route("/sdapi/v1/train/hypernetwork", self.train_hypernetwork, methods=["POST"], response_model=models.TrainResponse)
         self.add_api_route("/sdapi/v1/memory", self.get_memory, methods=["GET"], response_model=models.MemoryResponse)
@@ -675,19 +673,6 @@ class Api:
         finally:
             shared.state.end()
 
-    def preprocess(self, args: dict):
-        try:
-            shared.state.begin(job="preprocess")
-            preprocess(**args) # quick operation unless blip/booru interrogation is enabled
-            shared.state.end()
-            return models.PreprocessResponse(info='preprocess complete')
-        except KeyError as e:
-            return models.PreprocessResponse(info=f"preprocess error: invalid token: {e}")
-        except Exception as e:
-            return models.PreprocessResponse(info=f"preprocess error: {e}")
-        finally:
-            shared.state.end()
-
     def train_embedding(self, args: dict):
         try:
             shared.state.begin(job="train_embedding")
diff --git a/modules/api/models.py b/modules/api/models.py
index a0d80af8..33894b3e 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -202,9 +202,6 @@ class TrainResponse(BaseModel):
 class CreateResponse(BaseModel):
     info: str = Field(title="Create info", description="Response string from create embedding or hypernetwork task.")
 
-class PreprocessResponse(BaseModel):
-    info: str = Field(title="Preprocess info", description="Response string from preprocessing task.")
-
 fields = {}
 for key, metadata in opts.data_labels.items():
     value = opts.data.get(key)
diff --git a/modules/postprocessing.py b/modules/postprocessing.py
index 0a134ee4..3c85a74c 100644
--- a/modules/postprocessing.py
+++ b/modules/postprocessing.py
@@ -6,7 +6,7 @@ from modules import shared, images, devices, scripts, scripts_postprocessing, ui
 from modules.shared import opts
 
 
-def run_postprocessing(extras_mode, image, image_folder, input_dir, output_dir, show_extras_results, *args, save_output: bool = True):
+def run_postprocessing(id_task, extras_mode, image, image_folder, input_dir, output_dir, show_extras_results, *args, save_output: bool = True):
     devices.torch_gc()
 
     shared.state.begin(job="extras")
@@ -29,11 +29,7 @@ def run_postprocessing(extras_mode, image, image_folder, input_dir, output_dir,
 
             image_list = shared.listfiles(input_dir)
             for filename in image_list:
-                try:
-                    image = Image.open(filename)
-                except Exception:
-                    continue
-                yield image, filename
+                yield filename, filename
         else:
             assert image, 'image not selected'
             yield image, None
@@ -45,37 +41,85 @@ def run_postprocessing(extras_mode, image, image_folder, input_dir, output_dir,
 
     infotext = ''
 
-    for image_data, name in get_images(extras_mode, image, image_folder, input_dir):
+    data_to_process = list(get_images(extras_mode, image, image_folder, input_dir))
+    shared.state.job_count = len(data_to_process)
+
+    for image_placeholder, name in data_to_process:
         image_data: Image.Image
 
+        shared.state.nextjob()
         shared.state.textinfo = name
+        shared.state.skipped = False
+
+        if shared.state.interrupted:
+            break
+
+        if isinstance(image_placeholder, str):
+            try:
+                image_data = Image.open(image_placeholder)
+            except Exception:
+                continue
+        else:
+            image_data = image_placeholder
+
+        shared.state.assign_current_image(image_data)
 
         parameters, existing_pnginfo = images.read_info_from_image(image_data)
         if parameters:
             existing_pnginfo["parameters"] = parameters
 
-        pp = scripts_postprocessing.PostprocessedImage(image_data.convert("RGB"))
+        initial_pp = scripts_postprocessing.PostprocessedImage(image_data.convert("RGB"))
 
-        scripts.scripts_postproc.run(pp, args)
+        scripts.scripts_postproc.run(initial_pp, args)
 
-        if opts.use_original_name_batch and name is not None:
-            basename = os.path.splitext(os.path.basename(name))[0]
-            forced_filename = basename
-        else:
-            basename = ''
-            forced_filename = None
+        if shared.state.skipped:
+            continue
+
+        used_suffixes = {}
+        for pp in [initial_pp, *initial_pp.extra_images]:
+            suffix = pp.get_suffix(used_suffixes)
+
+            if opts.use_original_name_batch and name is not None:
+                basename = os.path.splitext(os.path.basename(name))[0]
+                forced_filename = basename + suffix
+            else:
+                basename = ''
+                forced_filename = None
+
+            infotext = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in pp.info.items() if v is not None])
+
+            if opts.enable_pnginfo:
+                pp.image.info = existing_pnginfo
+                pp.image.info["postprocessing"] = infotext
+
+            if save_output:
+                fullfn, _ = images.save_image(pp.image, path=outpath, basename=basename, extension=opts.samples_format, info=infotext, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=forced_filename, suffix=suffix)
 
-        infotext = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in pp.info.items() if v is not None])
+                if pp.caption:
+                    caption_filename = os.path.splitext(fullfn)[0] + ".txt"
+                    if os.path.isfile(caption_filename):
+                        with open(caption_filename, encoding="utf8") as file:
+                            existing_caption = file.read().strip()
+                    else:
+                        existing_caption = ""
 
-        if opts.enable_pnginfo:
-            pp.image.info = existing_pnginfo
-            pp.image.info["postprocessing"] = infotext
+                    action = shared.opts.postprocessing_existing_caption_action
+                    if action == 'Prepend' and existing_caption:
+                        caption = f"{existing_caption} {pp.caption}"
+                    elif action == 'Append' and existing_caption:
+                        caption = f"{pp.caption} {existing_caption}"
+                    elif action == 'Keep' and existing_caption:
+                        caption = existing_caption
+                    else:
+                        caption = pp.caption
 
-        if save_output:
-            images.save_image(pp.image, path=outpath, basename=basename, extension=opts.samples_format, info=infotext, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=forced_filename)
+                    caption = caption.strip()
+                    if caption:
+                        with open(caption_filename, "w", encoding="utf8") as file:
+                            file.write(caption)
 
-        if extras_mode != 2 or show_extras_results:
-            outputs.append(pp.image)
+            if extras_mode != 2 or show_extras_results:
+                outputs.append(pp.image)
 
         image_data.close()
 
@@ -99,9 +143,11 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
             "upscaler_2_visibility": extras_upscaler_2_visibility,
         },
         "GFPGAN": {
+            "enable": True,
             "gfpgan_visibility": gfpgan_visibility,
         },
         "CodeFormer": {
+            "enable": True,
             "codeformer_visibility": codeformer_visibility,
             "codeformer_weight": codeformer_weight,
         },
diff --git a/modules/scripts_postprocessing.py b/modules/scripts_postprocessing.py
index bac1335d..901cad08 100644
--- a/modules/scripts_postprocessing.py
+++ b/modules/scripts_postprocessing.py
@@ -1,13 +1,56 @@
+import dataclasses
 import os
 import gradio as gr
 
 from modules import errors, shared
 
 
+@dataclasses.dataclass
+class PostprocessedImageSharedInfo:
+    target_width: int = None
+    target_height: int = None
+
+
 class PostprocessedImage:
     def __init__(self, image):
         self.image = image
         self.info = {}
+        self.shared = PostprocessedImageSharedInfo()
+        self.extra_images = []
+        self.nametags = []
+        self.disable_processing = False
+        self.caption = None
+
+    def get_suffix(self, used_suffixes=None):
+        used_suffixes = {} if used_suffixes is None else used_suffixes
+        suffix = "-".join(self.nametags)
+        if suffix:
+            suffix = "-" + suffix
+
+        if suffix not in used_suffixes:
+            used_suffixes[suffix] = 1
+            return suffix
+
+        for i in range(1, 100):
+            proposed_suffix = suffix + "-" + str(i)
+
+            if proposed_suffix not in used_suffixes:
+                used_suffixes[proposed_suffix] = 1
+                return proposed_suffix
+
+        return suffix
+
+    def create_copy(self, new_image, *, nametags=None, disable_processing=False):
+        pp = PostprocessedImage(new_image)
+        pp.shared = self.shared
+        pp.nametags = self.nametags.copy()
+        pp.info = self.info.copy()
+        pp.disable_processing = disable_processing
+
+        if nametags is not None:
+            pp.nametags += nametags
+
+        return pp
 
 
 class ScriptPostprocessing:
@@ -42,10 +85,17 @@ class ScriptPostprocessing:
 
         pass
 
-    def image_changed(self):
-        pass
+    def process_firstpass(self, pp: PostprocessedImage, **args):
+        """
+        Called for all scripts before calling process(). Scripts can examine the image here and set fields
+        of the pp object to communicate things to other scripts.
+        args contains a dictionary with all values returned by components from ui()
+        """
 
+        pass
 
+    def image_changed(self):
+        pass
 
 
 def wrap_call(func, filename, funcname, *args, default=None, **kwargs):
@@ -118,16 +168,42 @@ class ScriptPostprocessingRunner:
         return inputs
 
     def run(self, pp: PostprocessedImage, args):
-        for script in self.scripts_in_preferred_order():
-            shared.state.job = script.name
+        scripts = []
 
+        for script in self.scripts_in_preferred_order():
             script_args = args[script.args_from:script.args_to]
 
             process_args = {}
             for (name, _component), value in zip(script.controls.items(), script_args):
                 process_args[name] = value
 
-            script.process(pp, **process_args)
+            scripts.append((script, process_args))
+
+        for script, process_args in scripts:
+            script.process_firstpass(pp, **process_args)
+
+        all_images = [pp]
+
+        for script, process_args in scripts:
+            if shared.state.skipped:
+                break
+
+            shared.state.job = script.name
+
+            for single_image in all_images.copy():
+
+                if not single_image.disable_processing:
+                    script.process(single_image, **process_args)
+
+                for extra_image in single_image.extra_images:
+                    if not isinstance(extra_image, PostprocessedImage):
+                        extra_image = single_image.create_copy(extra_image)
+
+                    all_images.append(extra_image)
+
+                single_image.extra_images.clear()
+
+        pp.extra_images = all_images[1:]
 
     def create_args_for_run(self, scripts_args):
         if not self.ui_created:
diff --git a/modules/shared_options.py b/modules/shared_options.py
index d8a27180..859dee40 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -357,6 +357,7 @@ options_templates.update(options_section(('postprocessing', "Postprocessing", "p
     'postprocessing_enable_in_main_ui': OptionInfo([], "Enable postprocessing operations in txt2img and img2img tabs", ui_components.DropdownMulti, lambda: {"choices": [x.name for x in shared_items.postprocessing_scripts()]}),
     'postprocessing_operation_order': OptionInfo([], "Postprocessing operation order", ui_components.DropdownMulti, lambda: {"choices": [x.name for x in shared_items.postprocessing_scripts()]}),
     'upscaling_max_images_in_cache': OptionInfo(5, "Maximum number of images in upscaling cache", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
+    'postprocessing_existing_caption_action': OptionInfo("Ignore", "Action for existing captions", gr.Radio, {"choices": ["Ignore", "Keep", "Prepend", "Append"]}).info("when generating captions using postprocessing; Ignore = use generated; Keep = use original; Prepend/Append = combine both"),
 }))
 
 options_templates.update(options_section((None, "Hidden options"), {
diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py
deleted file mode 100644
index 789fa083..00000000
--- a/modules/textual_inversion/preprocess.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import os
-from PIL import Image, ImageOps
-import math
-import tqdm
-
-from modules import shared, images, deepbooru
-from modules.textual_inversion import autocrop
-
-
-def preprocess(id_task, process_src, process_dst, process_width, process_height, preprocess_txt_action, process_keep_original_size, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.15, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False, process_multicrop=None, process_multicrop_mindim=None, process_multicrop_maxdim=None, process_multicrop_minarea=None, process_multicrop_maxarea=None, process_multicrop_objective=None, process_multicrop_threshold=None):
-    try:
-        if process_caption:
-            shared.interrogator.load()
-
-        if process_caption_deepbooru:
-            deepbooru.model.start()
-
-        preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_keep_original_size, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug, process_multicrop, process_multicrop_mindim, process_multicrop_maxdim, process_multicrop_minarea, process_multicrop_maxarea, process_multicrop_objective, process_multicrop_threshold)
-
-    finally:
-
-        if process_caption:
-            shared.interrogator.send_blip_to_ram()
-
-        if process_caption_deepbooru:
-            deepbooru.model.stop()
-
-
-def listfiles(dirname):
-    return os.listdir(dirname)
-
-
-class PreprocessParams:
-    src = None
-    dstdir = None
-    subindex = 0
-    flip = False
-    process_caption = False
-    process_caption_deepbooru = False
-    preprocess_txt_action = None
-
-
-def save_pic_with_caption(image, index, params: PreprocessParams, existing_caption=None):
-    caption = ""
-
-    if params.process_caption:
-        caption += shared.interrogator.generate_caption(image)
-
-    if params.process_caption_deepbooru:
-        if caption:
-            caption += ", "
-        caption += deepbooru.model.tag_multi(image)
-
-    filename_part = params.src
-    filename_part = os.path.splitext(filename_part)[0]
-    filename_part = os.path.basename(filename_part)
-
-    basename = f"{index:05}-{params.subindex}-{filename_part}"
-    image.save(os.path.join(params.dstdir, f"{basename}.png"))
-
-    if params.preprocess_txt_action == 'prepend' and existing_caption:
-        caption = f"{existing_caption} {caption}"
-    elif params.preprocess_txt_action == 'append' and existing_caption:
-        caption = f"{caption} {existing_caption}"
-    elif params.preprocess_txt_action == 'copy' and existing_caption:
-        caption = existing_caption
-
-    caption = caption.strip()
-
-    if caption:
-        with open(os.path.join(params.dstdir, f"{basename}.txt"), "w", encoding="utf8") as file:
-            file.write(caption)
-
-    params.subindex += 1
-
-
-def save_pic(image, index, params, existing_caption=None):
-    save_pic_with_caption(image, index, params, existing_caption=existing_caption)
-
-    if params.flip:
-        save_pic_with_caption(ImageOps.mirror(image), index, params, existing_caption=existing_caption)
-
-
-def split_pic(image, inverse_xy, width, height, overlap_ratio):
-    if inverse_xy:
-        from_w, from_h = image.height, image.width
-        to_w, to_h = height, width
-    else:
-        from_w, from_h = image.width, image.height
-        to_w, to_h = width, height
-    h = from_h * to_w // from_w
-    if inverse_xy:
-        image = image.resize((h, to_w))
-    else:
-        image = image.resize((to_w, h))
-
-    split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
-    y_step = (h - to_h) / (split_count - 1)
-    for i in range(split_count):
-        y = int(y_step * i)
-        if inverse_xy:
-            splitted = image.crop((y, 0, y + to_h, to_w))
-        else:
-            splitted = image.crop((0, y, to_w, y + to_h))
-        yield splitted
-
-# not using torchvision.transforms.CenterCrop because it doesn't allow float regions
-def center_crop(image: Image, w: int, h: int):
-    iw, ih = image.size
-    if ih / h < iw / w:
-        sw = w * ih / h
-        box = (iw - sw) / 2, 0, iw - (iw - sw) / 2, ih
-    else:
-        sh = h * iw / w
-        box = 0, (ih - sh) / 2, iw, ih - (ih - sh) / 2
-    return image.resize((w, h), Image.Resampling.LANCZOS, box)
-
-
-def multicrop_pic(image: Image, mindim, maxdim, minarea, maxarea, objective, threshold):
-    iw, ih = image.size
-    err = lambda w, h: 1-(lambda x: x if x < 1 else 1/x)(iw/ih/(w/h))
-    wh = max(((w, h) for w in range(mindim, maxdim+1, 64) for h in range(mindim, maxdim+1, 64)
-        if minarea <= w * h <= maxarea and err(w, h) <= threshold),
-        key= lambda wh: (wh[0]*wh[1], -err(*wh))[::1 if objective=='Maximize area' else -1],
-        default=None
-    )
-    return wh and center_crop(image, *wh)
-
-
-def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_keep_original_size, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False, process_multicrop=None, process_multicrop_mindim=None, process_multicrop_maxdim=None, process_multicrop_minarea=None, process_multicrop_maxarea=None, process_multicrop_objective=None, process_multicrop_threshold=None):
-    width = process_width
-    height = process_height
-    src = os.path.abspath(process_src)
-    dst = os.path.abspath(process_dst)
-    split_threshold = max(0.0, min(1.0, split_threshold))
-    overlap_ratio = max(0.0, min(0.9, overlap_ratio))
-
-    assert src != dst, 'same directory specified as source and destination'
-
-    os.makedirs(dst, exist_ok=True)
-
-    files = listfiles(src)
-
-    shared.state.job = "preprocess"
-    shared.state.textinfo = "Preprocessing..."
-    shared.state.job_count = len(files)
-
-    params = PreprocessParams()
-    params.dstdir = dst
-    params.flip = process_flip
-    params.process_caption = process_caption
-    params.process_caption_deepbooru = process_caption_deepbooru
-    params.preprocess_txt_action = preprocess_txt_action
-
-    pbar = tqdm.tqdm(files)
-    for index, imagefile in enumerate(pbar):
-        params.subindex = 0
-        filename = os.path.join(src, imagefile)
-        try:
-            img = Image.open(filename)
-            img = ImageOps.exif_transpose(img)
-            img = img.convert("RGB")
-        except Exception:
-            continue
-
-        description = f"Preprocessing [Image {index}/{len(files)}]"
-        pbar.set_description(description)
-        shared.state.textinfo = description
-
-        params.src = filename
-
-        existing_caption = None
-        existing_caption_filename = f"{os.path.splitext(filename)[0]}.txt"
-        if os.path.exists(existing_caption_filename):
-            with open(existing_caption_filename, 'r', encoding="utf8") as file:
-                existing_caption = file.read()
-
-        if shared.state.interrupted:
-            break
-
-        if img.height > img.width:
-            ratio = (img.width * height) / (img.height * width)
-            inverse_xy = False
-        else:
-            ratio = (img.height * width) / (img.width * height)
-            inverse_xy = True
-
-        process_default_resize = True
-
-        if process_split and ratio < 1.0 and ratio <= split_threshold:
-            for splitted in split_pic(img, inverse_xy, width, height, overlap_ratio):
-                save_pic(splitted, index, params, existing_caption=existing_caption)
-            process_default_resize = False
-
-        if process_focal_crop and img.height != img.width:
-
-            dnn_model_path = None
-            try:
-                dnn_model_path = autocrop.download_and_cache_models()
-            except Exception as e:
-                print("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", e)
-
-            autocrop_settings = autocrop.Settings(
-                crop_width = width,
-                crop_height = height,
-                face_points_weight = process_focal_crop_face_weight,
-                entropy_points_weight = process_focal_crop_entropy_weight,
-                corner_points_weight = process_focal_crop_edges_weight,
-                annotate_image = process_focal_crop_debug,
-                dnn_model_path = dnn_model_path,
-            )
-            for focal in autocrop.crop_image(img, autocrop_settings):
-                save_pic(focal, index, params, existing_caption=existing_caption)
-            process_default_resize = False
-
-        if process_multicrop:
-            cropped = multicrop_pic(img, process_multicrop_mindim, process_multicrop_maxdim, process_multicrop_minarea, process_multicrop_maxarea, process_multicrop_objective, process_multicrop_threshold)
-            if cropped is not None:
-                save_pic(cropped, index, params, existing_caption=existing_caption)
-            else:
-                print(f"skipped {img.width}x{img.height} image {filename} (can't find suitable size within error threshold)")
-            process_default_resize = False
-
-        if process_keep_original_size:
-            save_pic(img, index, params, existing_caption=existing_caption)
-            process_default_resize = False
-
-        if process_default_resize:
-            img = images.resize_image(1, img, width, height)
-            save_pic(img, index, params, existing_caption=existing_caption)
-
-        shared.state.nextjob()
diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py
index 35c4feef..f149ad1f 100644
--- a/modules/textual_inversion/ui.py
+++ b/modules/textual_inversion/ui.py
@@ -3,7 +3,6 @@ import html
 import gradio as gr
 
 import modules.textual_inversion.textual_inversion
-import modules.textual_inversion.preprocess
 from modules import sd_hijack, shared
 
 
@@ -15,12 +14,6 @@ def create_embedding(name, initialization_text, nvpt, overwrite_old):
     return gr.Dropdown.update(choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())), f"Created: {filename}", ""
 
 
-def preprocess(*args):
-    modules.textual_inversion.preprocess.preprocess(*args)
-
-    return f"Preprocessing {'interrupted' if shared.state.interrupted else 'finished'}.", ""
-
-
 def train_embedding(*args):
 
     assert not shared.cmd_opts.lowvram, 'Training models with lowvram not possible'
diff --git a/modules/ui.py b/modules/ui.py
index 08e0ad77..d80486dd 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -912,71 +912,6 @@ def create_ui():
                         with gr.Column():
                             create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary', elem_id="train_create_hypernetwork")
 
-                with gr.Tab(label="Preprocess images", id="preprocess_images"):
-                    process_src = gr.Textbox(label='Source directory', elem_id="train_process_src")
-                    process_dst = gr.Textbox(label='Destination directory', elem_id="train_process_dst")
-                    process_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="train_process_width")
-                    process_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="train_process_height")
-                    preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"], elem_id="train_preprocess_txt_action")
-
-                    with gr.Row():
-                        process_keep_original_size = gr.Checkbox(label='Keep original size', elem_id="train_process_keep_original_size")
-                        process_flip = gr.Checkbox(label='Create flipped copies', elem_id="train_process_flip")
-                        process_split = gr.Checkbox(label='Split oversized images', elem_id="train_process_split")
-                        process_focal_crop = gr.Checkbox(label='Auto focal point crop', elem_id="train_process_focal_crop")
-                        process_multicrop = gr.Checkbox(label='Auto-sized crop', elem_id="train_process_multicrop")
-                        process_caption = gr.Checkbox(label='Use BLIP for caption', elem_id="train_process_caption")
-                        process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True, elem_id="train_process_caption_deepbooru")
-
-                    with gr.Row(visible=False) as process_split_extra_row:
-                        process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_split_threshold")
-                        process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05, elem_id="train_process_overlap_ratio")
-
-                    with gr.Row(visible=False) as process_focal_crop_row:
-                        process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_face_weight")
-                        process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_entropy_weight")
-                        process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_edges_weight")
-                        process_focal_crop_debug = gr.Checkbox(label='Create debug image', elem_id="train_process_focal_crop_debug")
-
-                    with gr.Column(visible=False) as process_multicrop_col:
-                        gr.Markdown('Each image is center-cropped with an automatically chosen width and height.')
-                        with gr.Row():
-                            process_multicrop_mindim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension lower bound", value=384, elem_id="train_process_multicrop_mindim")
-                            process_multicrop_maxdim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension upper bound", value=768, elem_id="train_process_multicrop_maxdim")
-                        with gr.Row():
-                            process_multicrop_minarea = gr.Slider(minimum=64*64, maximum=2048*2048, step=1, label="Area lower bound", value=64*64, elem_id="train_process_multicrop_minarea")
-                            process_multicrop_maxarea = gr.Slider(minimum=64*64, maximum=2048*2048, step=1, label="Area upper bound", value=640*640, elem_id="train_process_multicrop_maxarea")
-                        with gr.Row():
-                            process_multicrop_objective = gr.Radio(["Maximize area", "Minimize error"], value="Maximize area", label="Resizing objective", elem_id="train_process_multicrop_objective")
-                            process_multicrop_threshold = gr.Slider(minimum=0, maximum=1, step=0.01, label="Error threshold", value=0.1, elem_id="train_process_multicrop_threshold")
-
-                    with gr.Row():
-                        with gr.Column(scale=3):
-                            gr.HTML(value="")
-
-                        with gr.Column():
-                            with gr.Row():
-                                interrupt_preprocessing = gr.Button("Interrupt", elem_id="train_interrupt_preprocessing")
-                            run_preprocess = gr.Button(value="Preprocess", variant='primary', elem_id="train_run_preprocess")
-
-                    process_split.change(
-                        fn=lambda show: gr_show(show),
-                        inputs=[process_split],
-                        outputs=[process_split_extra_row],
-                    )
-
-                    process_focal_crop.change(
-                        fn=lambda show: gr_show(show),
-                        inputs=[process_focal_crop],
-                        outputs=[process_focal_crop_row],
-                    )
-
-                    process_multicrop.change(
-                        fn=lambda show: gr_show(show),
-                        inputs=[process_multicrop],
-                        outputs=[process_multicrop_col],
-                    )
-
                 def get_textual_inversion_template_names():
                     return sorted(textual_inversion.textual_inversion_templates)
 
@@ -1077,42 +1012,6 @@ def create_ui():
             ]
         )
 
-        run_preprocess.click(
-            fn=wrap_gradio_gpu_call(textual_inversion_ui.preprocess, extra_outputs=[gr.update()]),
-            _js="start_training_textual_inversion",
-            inputs=[
-                dummy_component,
-                process_src,
-                process_dst,
-                process_width,
-                process_height,
-                preprocess_txt_action,
-                process_keep_original_size,
-                process_flip,
-                process_split,
-                process_caption,
-                process_caption_deepbooru,
-                process_split_threshold,
-                process_overlap_ratio,
-                process_focal_crop,
-                process_focal_crop_face_weight,
-                process_focal_crop_entropy_weight,
-                process_focal_crop_edges_weight,
-                process_focal_crop_debug,
-                process_multicrop,
-                process_multicrop_mindim,
-                process_multicrop_maxdim,
-                process_multicrop_minarea,
-                process_multicrop_maxarea,
-                process_multicrop_objective,
-                process_multicrop_threshold,
-            ],
-            outputs=[
-                ti_output,
-                ti_outcome,
-            ],
-        )
-
         train_embedding.click(
             fn=wrap_gradio_gpu_call(textual_inversion_ui.train_embedding, extra_outputs=[gr.update()]),
             _js="start_training_textual_inversion",
@@ -1186,12 +1085,6 @@ def create_ui():
             outputs=[],
         )
 
-        interrupt_preprocessing.click(
-            fn=lambda: shared.state.interrupt(),
-            inputs=[],
-            outputs=[],
-        )
-
     loadsave = ui_loadsave.UiLoadsave(cmd_opts.ui_config_file)
 
     settings = ui_settings.UiSettings()
diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py
index 802e1ce7..fbad0800 100644
--- a/modules/ui_postprocessing.py
+++ b/modules/ui_postprocessing.py
@@ -1,9 +1,10 @@
 import gradio as gr
-from modules import scripts, shared, ui_common, postprocessing, call_queue
+from modules import scripts, shared, ui_common, postprocessing, call_queue, ui_toprow
 import modules.generation_parameters_copypaste as parameters_copypaste
 
 
 def create_ui():
+    dummy_component = gr.Label(visible=False)
     tab_index = gr.State(value=0)
 
     with gr.Row(equal_height=False, variant='compact'):
@@ -20,11 +21,13 @@ def create_ui():
                     extras_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, placeholder="Leave blank to save images to the default path.", elem_id="extras_batch_output_dir")
                     show_extras_results = gr.Checkbox(label='Show result images', value=True, elem_id="extras_show_extras_results")
 
-            submit = gr.Button('Generate', elem_id="extras_generate", variant='primary')
-
             script_inputs = scripts.scripts_postproc.setup_ui()
 
         with gr.Column():
+            toprow = ui_toprow.Toprow(is_compact=True, is_img2img=False, id_part="extras")
+            toprow.create_inline_toprow_image()
+            submit = toprow.submit
+
             result_images, html_info_x, html_info, html_log = ui_common.create_output_panel("extras", shared.opts.outdir_extras_samples)
 
     tab_single.select(fn=lambda: 0, inputs=[], outputs=[tab_index])
@@ -33,7 +36,9 @@ def create_ui():
 
     submit.click(
         fn=call_queue.wrap_gradio_gpu_call(postprocessing.run_postprocessing, extra_outputs=[None, '']),
+        _js="submit_extras",
         inputs=[
+            dummy_component,
             tab_index,
             extras_image,
             image_batch,
@@ -45,8 +50,9 @@ def create_ui():
         outputs=[
             result_images,
             html_info_x,
-            html_info,
-        ]
+            html_log,
+        ],
+        show_progress=False,
     )
 
     parameters_copypaste.add_paste_fields("extras", extras_image, None)
diff --git a/modules/ui_toprow.py b/modules/ui_toprow.py
index 985b5a2d..88838f97 100644
--- a/modules/ui_toprow.py
+++ b/modules/ui_toprow.py
@@ -34,8 +34,10 @@ class Toprow:
 
     submit_box = None
 
-    def __init__(self, is_img2img, is_compact=False):
-        id_part = "img2img" if is_img2img else "txt2img"
+    def __init__(self, is_img2img, is_compact=False, id_part=None):
+        if id_part is None:
+            id_part = "img2img" if is_img2img else "txt2img"
+
         self.id_part = id_part
         self.is_img2img = is_img2img
         self.is_compact = is_compact
diff --git a/scripts/postprocessing_caption.py b/scripts/postprocessing_caption.py
new file mode 100644
index 00000000..243e3ad9
--- /dev/null
+++ b/scripts/postprocessing_caption.py
@@ -0,0 +1,30 @@
+from modules import scripts_postprocessing, ui_components, deepbooru, shared
+import gradio as gr
+
+
+class ScriptPostprocessingCeption(scripts_postprocessing.ScriptPostprocessing):
+    name = "Caption"
+    order = 4000
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Caption") as enable:
+            option = gr.CheckboxGroup(value=["Deepbooru"], choices=["Deepbooru", "BLIP"], show_label=False)
+
+        return {
+            "enable": enable,
+            "option": option,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, option):
+        if not enable:
+            return
+
+        captions = [pp.caption]
+
+        if "Deepbooru" in option:
+            captions.append(deepbooru.model.tag(pp.image))
+
+        if "BLIP" in option:
+            captions.append(shared.interrogator.generate_caption(pp.image))
+
+        pp.caption = ", ".join([x for x in captions if x])
diff --git a/scripts/postprocessing_codeformer.py b/scripts/postprocessing_codeformer.py
index a7d80d40..e1e156dd 100644
--- a/scripts/postprocessing_codeformer.py
+++ b/scripts/postprocessing_codeformer.py
@@ -1,28 +1,28 @@
 from PIL import Image
 import numpy as np
 
-from modules import scripts_postprocessing, codeformer_model
+from modules import scripts_postprocessing, codeformer_model, ui_components
 import gradio as gr
 
-from modules.ui_components import FormRow
-
 
 class ScriptPostprocessingCodeFormer(scripts_postprocessing.ScriptPostprocessing):
     name = "CodeFormer"
     order = 3000
 
     def ui(self):
-        with FormRow():
-            codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer visibility", value=0, elem_id="extras_codeformer_visibility")
-            codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer weight (0 = maximum effect, 1 = minimum effect)", value=0, elem_id="extras_codeformer_weight")
+        with ui_components.InputAccordion(False, label="CodeFormer") as enable:
+            with gr.Row():
+                codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Visibility", value=1.0, elem_id="extras_codeformer_visibility")
+                codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Weight (0 = maximum effect, 1 = minimum effect)", value=0, elem_id="extras_codeformer_weight")
 
         return {
+            "enable": enable,
             "codeformer_visibility": codeformer_visibility,
             "codeformer_weight": codeformer_weight,
         }
 
-    def process(self, pp: scripts_postprocessing.PostprocessedImage, codeformer_visibility, codeformer_weight):
-        if codeformer_visibility == 0:
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, codeformer_visibility, codeformer_weight):
+        if codeformer_visibility == 0 or not enable:
             return
 
         restored_img = codeformer_model.codeformer.restore(np.array(pp.image, dtype=np.uint8), w=codeformer_weight)
diff --git a/scripts/postprocessing_create_flipped_copies.py b/scripts/postprocessing_create_flipped_copies.py
new file mode 100644
index 00000000..3425571d
--- /dev/null
+++ b/scripts/postprocessing_create_flipped_copies.py
@@ -0,0 +1,32 @@
+from PIL import ImageOps, Image
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+class ScriptPostprocessingCreateFlippedCopies(scripts_postprocessing.ScriptPostprocessing):
+    name = "Create flipped copies"
+    order = 4000
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Create flipped copies") as enable:
+            with gr.Row():
+                option = gr.CheckboxGroup(value=["Horizontal"], choices=["Horizontal", "Vertical", "Both"], show_label=False)
+
+        return {
+            "enable": enable,
+            "option": option,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, option):
+        if not enable:
+            return
+
+        if "Horizontal" in option:
+            pp.extra_images.append(ImageOps.mirror(pp.image))
+
+        if "Vertical" in option:
+            pp.extra_images.append(pp.image.transpose(Image.Transpose.FLIP_TOP_BOTTOM))
+
+        if "Both" in option:
+            pp.extra_images.append(pp.image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).transpose(Image.Transpose.FLIP_LEFT_RIGHT))
diff --git a/scripts/postprocessing_focal_crop.py b/scripts/postprocessing_focal_crop.py
new file mode 100644
index 00000000..d3baf298
--- /dev/null
+++ b/scripts/postprocessing_focal_crop.py
@@ -0,0 +1,54 @@
+
+from modules import scripts_postprocessing, ui_components, errors
+import gradio as gr
+
+from modules.textual_inversion import autocrop
+
+
+class ScriptPostprocessingFocalCrop(scripts_postprocessing.ScriptPostprocessing):
+    name = "Auto focal point crop"
+    order = 4000
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Auto focal point crop") as enable:
+            face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_face_weight")
+            entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_entropy_weight")
+            edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_edges_weight")
+            debug = gr.Checkbox(label='Create debug image', elem_id="train_process_focal_crop_debug")
+
+        return {
+            "enable": enable,
+            "face_weight": face_weight,
+            "entropy_weight": entropy_weight,
+            "edges_weight": edges_weight,
+            "debug": debug,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, face_weight, entropy_weight, edges_weight, debug):
+        if not enable:
+            return
+
+        if not pp.shared.target_width or not pp.shared.target_height:
+            return
+
+        dnn_model_path = None
+        try:
+            dnn_model_path = autocrop.download_and_cache_models()
+        except Exception:
+            errors.report("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", exc_info=True)
+
+        autocrop_settings = autocrop.Settings(
+            crop_width=pp.shared.target_width,
+            crop_height=pp.shared.target_height,
+            face_points_weight=face_weight,
+            entropy_points_weight=entropy_weight,
+            corner_points_weight=edges_weight,
+            annotate_image=debug,
+            dnn_model_path=dnn_model_path,
+        )
+
+        result, *others = autocrop.crop_image(pp.image, autocrop_settings)
+
+        pp.image = result
+        pp.extra_images = [pp.create_copy(x, nametags=["focal-crop-debug"], disable_processing=True) for x in others]
+
diff --git a/scripts/postprocessing_gfpgan.py b/scripts/postprocessing_gfpgan.py
index d854f3f7..6e756605 100644
--- a/scripts/postprocessing_gfpgan.py
+++ b/scripts/postprocessing_gfpgan.py
@@ -1,26 +1,25 @@
 from PIL import Image
 import numpy as np
 
-from modules import scripts_postprocessing, gfpgan_model
+from modules import scripts_postprocessing, gfpgan_model, ui_components
 import gradio as gr
 
-from modules.ui_components import FormRow
-
 
 class ScriptPostprocessingGfpGan(scripts_postprocessing.ScriptPostprocessing):
     name = "GFPGAN"
     order = 2000
 
     def ui(self):
-        with FormRow():
-            gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN visibility", value=0, elem_id="extras_gfpgan_visibility")
+        with ui_components.InputAccordion(False, label="GFPGAN") as enable:
+            gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Visibility", value=1.0, elem_id="extras_gfpgan_visibility")
 
         return {
+            "enable": enable,
             "gfpgan_visibility": gfpgan_visibility,
         }
 
-    def process(self, pp: scripts_postprocessing.PostprocessedImage, gfpgan_visibility):
-        if gfpgan_visibility == 0:
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, gfpgan_visibility):
+        if gfpgan_visibility == 0 or not enable:
             return
 
         restored_img = gfpgan_model.gfpgan_fix_faces(np.array(pp.image, dtype=np.uint8))
diff --git a/scripts/postprocessing_split_oversized.py b/scripts/postprocessing_split_oversized.py
new file mode 100644
index 00000000..c4a03160
--- /dev/null
+++ b/scripts/postprocessing_split_oversized.py
@@ -0,0 +1,71 @@
+import math
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+def split_pic(image, inverse_xy, width, height, overlap_ratio):
+    if inverse_xy:
+        from_w, from_h = image.height, image.width
+        to_w, to_h = height, width
+    else:
+        from_w, from_h = image.width, image.height
+        to_w, to_h = width, height
+    h = from_h * to_w // from_w
+    if inverse_xy:
+        image = image.resize((h, to_w))
+    else:
+        image = image.resize((to_w, h))
+
+    split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
+    y_step = (h - to_h) / (split_count - 1)
+    for i in range(split_count):
+        y = int(y_step * i)
+        if inverse_xy:
+            splitted = image.crop((y, 0, y + to_h, to_w))
+        else:
+            splitted = image.crop((0, y, to_w, y + to_h))
+        yield splitted
+
+
+class ScriptPostprocessingSplitOversized(scripts_postprocessing.ScriptPostprocessing):
+    name = "Split oversized images"
+    order = 4000
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Split oversized images") as enable:
+            with gr.Row():
+                split_threshold = gr.Slider(label='Threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_split_threshold")
+                overlap_ratio = gr.Slider(label='Overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05, elem_id="postprocess_overlap_ratio")
+
+        return {
+            "enable": enable,
+            "split_threshold": split_threshold,
+            "overlap_ratio": overlap_ratio,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, split_threshold, overlap_ratio):
+        if not enable:
+            return
+
+        width = pp.shared.target_width
+        height = pp.shared.target_height
+
+        if not width or not height:
+            return
+
+        if pp.image.height > pp.image.width:
+            ratio = (pp.image.width * height) / (pp.image.height * width)
+            inverse_xy = False
+        else:
+            ratio = (pp.image.height * width) / (pp.image.width * height)
+            inverse_xy = True
+
+        if ratio >= 1.0 and ratio > split_threshold:
+            return
+
+        result, *others = split_pic(pp.image, inverse_xy, width, height, overlap_ratio)
+
+        pp.image = result
+        pp.extra_images = [pp.create_copy(x) for x in others]
+
diff --git a/scripts/postprocessing_upscale.py b/scripts/postprocessing_upscale.py
index eb42a29e..ed709688 100644
--- a/scripts/postprocessing_upscale.py
+++ b/scripts/postprocessing_upscale.py
@@ -81,6 +81,14 @@ class ScriptPostprocessingUpscale(scripts_postprocessing.ScriptPostprocessing):
 
         return image
 
+    def process_firstpass(self, pp: scripts_postprocessing.PostprocessedImage, upscale_mode=1, upscale_by=2.0, upscale_to_width=None, upscale_to_height=None, upscale_crop=False, upscaler_1_name=None, upscaler_2_name=None, upscaler_2_visibility=0.0):
+        if upscale_mode == 1:
+            pp.shared.target_width = upscale_to_width
+            pp.shared.target_height = upscale_to_height
+        else:
+            pp.shared.target_width = int(pp.image.width * upscale_by)
+            pp.shared.target_height = int(pp.image.height * upscale_by)
+
     def process(self, pp: scripts_postprocessing.PostprocessedImage, upscale_mode=1, upscale_by=2.0, upscale_to_width=None, upscale_to_height=None, upscale_crop=False, upscaler_1_name=None, upscaler_2_name=None, upscaler_2_visibility=0.0):
         if upscaler_1_name == "None":
             upscaler_1_name = None
@@ -126,6 +134,10 @@ class ScriptPostprocessingUpscaleSimple(ScriptPostprocessingUpscale):
             "upscaler_name": upscaler_name,
         }
 
+    def process_firstpass(self, pp: scripts_postprocessing.PostprocessedImage, upscale_by=2.0, upscaler_name=None):
+        pp.shared.target_width = int(pp.image.width * upscale_by)
+        pp.shared.target_height = int(pp.image.height * upscale_by)
+
     def process(self, pp: scripts_postprocessing.PostprocessedImage, upscale_by=2.0, upscaler_name=None):
         if upscaler_name is None or upscaler_name == "None":
             return
diff --git a/scripts/processing_autosized_crop.py b/scripts/processing_autosized_crop.py
new file mode 100644
index 00000000..c0980226
--- /dev/null
+++ b/scripts/processing_autosized_crop.py
@@ -0,0 +1,64 @@
+from PIL import Image
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+def center_crop(image: Image, w: int, h: int):
+    iw, ih = image.size
+    if ih / h < iw / w:
+        sw = w * ih / h
+        box = (iw - sw) / 2, 0, iw - (iw - sw) / 2, ih
+    else:
+        sh = h * iw / w
+        box = 0, (ih - sh) / 2, iw, ih - (ih - sh) / 2
+    return image.resize((w, h), Image.Resampling.LANCZOS, box)
+
+
+def multicrop_pic(image: Image, mindim, maxdim, minarea, maxarea, objective, threshold):
+    iw, ih = image.size
+    err = lambda w, h: 1 - (lambda x: x if x < 1 else 1 / x)(iw / ih / (w / h))
+    wh = max(((w, h) for w in range(mindim, maxdim + 1, 64) for h in range(mindim, maxdim + 1, 64)
+              if minarea <= w * h <= maxarea and err(w, h) <= threshold),
+             key=lambda wh: (wh[0] * wh[1], -err(*wh))[::1 if objective == 'Maximize area' else -1],
+             default=None
+             )
+    return wh and center_crop(image, *wh)
+
+
+class ScriptPostprocessingAutosizedCrop(scripts_postprocessing.ScriptPostprocessing):
+    name = "Auto-sized crop"
+    order = 4000
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Auto-sized crop") as enable:
+            gr.Markdown('Each image is center-cropped with an automatically chosen width and height.')
+            with gr.Row():
+                mindim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension lower bound", value=384, elem_id="postprocess_multicrop_mindim")
+                maxdim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension upper bound", value=768, elem_id="postprocess_multicrop_maxdim")
+            with gr.Row():
+                minarea = gr.Slider(minimum=64 * 64, maximum=2048 * 2048, step=1, label="Area lower bound", value=64 * 64, elem_id="postprocess_multicrop_minarea")
+                maxarea = gr.Slider(minimum=64 * 64, maximum=2048 * 2048, step=1, label="Area upper bound", value=640 * 640, elem_id="postprocess_multicrop_maxarea")
+            with gr.Row():
+                objective = gr.Radio(["Maximize area", "Minimize error"], value="Maximize area", label="Resizing objective", elem_id="postprocess_multicrop_objective")
+                threshold = gr.Slider(minimum=0, maximum=1, step=0.01, label="Error threshold", value=0.1, elem_id="postprocess_multicrop_threshold")
+
+        return {
+            "enable": enable,
+            "mindim": mindim,
+            "maxdim": maxdim,
+            "minarea": minarea,
+            "maxarea": maxarea,
+            "objective": objective,
+            "threshold": threshold,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, mindim, maxdim, minarea, maxarea, objective, threshold):
+        if not enable:
+            return
+
+        cropped = multicrop_pic(pp.image, mindim, maxdim, minarea, maxarea, objective, threshold)
+        if cropped is not None:
+            pp.image = cropped
+        else:
+            print(f"skipped {pp.image.width}x{pp.image.height} image (can't find suitable size within error threshold)")
-- 
cgit v1.2.3