diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/api/api.py | 107 | ||||
-rw-r--r-- | modules/api/models.py | 63 | ||||
-rw-r--r-- | modules/extras.py | 178 | ||||
-rw-r--r-- | modules/generation_parameters_copypaste.py | 100 | ||||
-rw-r--r-- | modules/hypernetworks/hypernetwork.py | 62 | ||||
-rw-r--r-- | modules/hypernetworks/ui.py | 5 | ||||
-rw-r--r-- | modules/images.py | 64 | ||||
-rw-r--r-- | modules/img2img.py | 10 | ||||
-rw-r--r-- | modules/processing.py | 150 | ||||
-rw-r--r-- | modules/script_callbacks.py | 50 | ||||
-rw-r--r-- | modules/sd_models.py | 7 | ||||
-rw-r--r-- | modules/shared.py | 12 | ||||
-rw-r--r-- | modules/textual_inversion/autocrop.py | 341 | ||||
-rw-r--r-- | modules/textual_inversion/dataset.py | 4 | ||||
-rw-r--r-- | modules/textual_inversion/learn_schedule.py | 2 | ||||
-rw-r--r-- | modules/textual_inversion/preprocess.py | 38 | ||||
-rw-r--r-- | modules/textual_inversion/textual_inversion.py | 42 | ||||
-rw-r--r-- | modules/ui.py | 371 |
18 files changed, 1108 insertions, 498 deletions
diff --git a/modules/api/api.py b/modules/api/api.py index a860a964..49c213ea 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -1,45 +1,37 @@ -from modules.api.models import StableDiffusionTxt2ImgProcessingAPI, StableDiffusionImg2ImgProcessingAPI +import uvicorn +from gradio.processing_utils import encode_pil_to_base64, decode_base64_to_file, decode_base64_to_image +from fastapi import APIRouter, HTTPException +import modules.shared as shared +from modules.api.models import * from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images from modules.sd_samplers import all_samplers -from modules.extras import run_pnginfo -import modules.shared as shared -import uvicorn -from fastapi import Body, APIRouter, HTTPException -from fastapi.responses import JSONResponse -from pydantic import BaseModel, Field, Json -import json -import io -import base64 -from PIL import Image - -sampler_to_index = lambda name: next(filter(lambda row: name.lower() == row[1].name.lower(), enumerate(all_samplers)), None) +from modules.extras import run_extras -class TextToImageResponse(BaseModel): - images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") - parameters: Json - info: Json +def upscaler_to_index(name: str): + try: + return [x.name.lower() for x in shared.sd_upscalers].index(name.lower()) + except: + raise HTTPException(status_code=400, detail=f"Invalid upscaler, needs to be on of these: {' , '.join([x.name for x in sd_upscalers])}") -class ImageToImageResponse(BaseModel): - images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") - parameters: Json - info: Json +sampler_to_index = lambda name: next(filter(lambda row: name.lower() == row[1].name.lower(), enumerate(all_samplers)), None) +def setUpscalers(req: dict): + reqDict = vars(req) + reqDict['extras_upscaler_1'] = upscaler_to_index(req.upscaler_1) + reqDict['extras_upscaler_2'] = upscaler_to_index(req.upscaler_2) + reqDict.pop('upscaler_1') + reqDict.pop('upscaler_2') + return reqDict class Api: def __init__(self, app, queue_lock): self.router = APIRouter() self.app = app self.queue_lock = queue_lock - self.app.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"]) - self.app.add_api_route("/sdapi/v1/img2img", self.img2imgapi, methods=["POST"]) - - def __base64_to_image(self, base64_string): - # if has a comma, deal with prefix - if "," in base64_string: - base64_string = base64_string.split(",")[1] - imgdata = base64.b64decode(base64_string) - # convert base64 to PIL image - return Image.open(io.BytesIO(imgdata)) + self.app.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"], response_model=TextToImageResponse) + self.app.add_api_route("/sdapi/v1/img2img", self.img2imgapi, methods=["POST"], response_model=ImageToImageResponse) + self.app.add_api_route("/sdapi/v1/extra-single-image", self.extras_single_image_api, methods=["POST"], response_model=ExtrasSingleImageResponse) + self.app.add_api_route("/sdapi/v1/extra-batch-images", self.extras_batch_images_api, methods=["POST"], response_model=ExtrasBatchImagesResponse) def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI): sampler_index = sampler_to_index(txt2imgreq.sampler_index) @@ -59,15 +51,9 @@ class Api: with self.queue_lock: processed = process_images(p) - b64images = [] - for i in processed.images: - buffer = io.BytesIO() - i.save(buffer, format="png") - b64images.append(base64.b64encode(buffer.getvalue())) - - return TextToImageResponse(images=b64images, parameters=json.dumps(vars(txt2imgreq)), info=json.dumps(processed.info)) - + b64images = list(map(encode_pil_to_base64, processed.images)) + return TextToImageResponse(images=b64images, parameters=vars(txt2imgreq), info=processed.js()) def img2imgapi(self, img2imgreq: StableDiffusionImg2ImgProcessingAPI): sampler_index = sampler_to_index(img2imgreq.sampler_index) @@ -82,7 +68,7 @@ class Api: mask = img2imgreq.mask if mask: - mask = self.__base64_to_image(mask) + mask = decode_base64_to_image(mask) populate = img2imgreq.copy(update={ # Override __init__ params @@ -97,7 +83,7 @@ class Api: imgs = [] for img in init_images: - img = self.__base64_to_image(img) + img = decode_base64_to_image(img) imgs = [img] * p.batch_size p.init_images = imgs @@ -105,17 +91,40 @@ class Api: with self.queue_lock: processed = process_images(p) - b64images = [] - for i in processed.images: - buffer = io.BytesIO() - i.save(buffer, format="png") - b64images.append(base64.b64encode(buffer.getvalue())) + b64images = list(map(encode_pil_to_base64, processed.images)) - return ImageToImageResponse(images=b64images, parameters=json.dumps(vars(img2imgreq)), info=json.dumps(processed.info)) + if (not img2imgreq.include_init_images): + img2imgreq.init_images = None + img2imgreq.mask = None + + return ImageToImageResponse(images=b64images, parameters=vars(img2imgreq), info=processed.js()) - def extrasapi(self): - raise NotImplementedError + def extras_single_image_api(self, req: ExtrasSingleImageRequest): + reqDict = setUpscalers(req) + + reqDict['image'] = decode_base64_to_image(reqDict['image']) + + with self.queue_lock: + result = run_extras(extras_mode=0, image_folder="", input_dir="", output_dir="", **reqDict) + + return ExtrasSingleImageResponse(image=encode_pil_to_base64(result[0][0]), html_info=result[1]) + + def extras_batch_images_api(self, req: ExtrasBatchImagesRequest): + reqDict = setUpscalers(req) + + def prepareFiles(file): + file = decode_base64_to_file(file.data, file_path=file.name) + file.orig_name = file.name + return file + + reqDict['image_folder'] = list(map(prepareFiles, reqDict['imageList'])) + reqDict.pop('imageList') + + with self.queue_lock: + result = run_extras(extras_mode=1, image="", input_dir="", output_dir="", **reqDict) + return ExtrasBatchImagesResponse(images=list(map(encode_pil_to_base64, result[0])), html_info=result[1]) + def pnginfoapi(self): raise NotImplementedError diff --git a/modules/api/models.py b/modules/api/models.py index f551fa35..dd122321 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -1,10 +1,10 @@ -from array import array -from inflection import underscore -from typing import Any, Dict, Optional +import inspect from pydantic import BaseModel, Field, create_model +from typing import Any, Optional +from typing_extensions import Literal +from inflection import underscore from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img -import inspect - +from modules.shared import sd_upscalers API_NOT_ALLOWED = [ "self", @@ -31,6 +31,7 @@ class ModelDef(BaseModel): field_alias: str field_type: Any field_value: Any + field_exclude: bool = False class PydanticModelGenerator: @@ -78,7 +79,8 @@ class PydanticModelGenerator: field=underscore(fields["key"]), field_alias=fields["key"], field_type=fields["type"], - field_value=fields["default"])) + field_value=fields["default"], + field_exclude=fields["exclude"] if "exclude" in fields else False)) def generate_model(self): """ @@ -86,7 +88,7 @@ class PydanticModelGenerator: from the json and overrides provided at initialization """ fields = { - d.field: (d.field_type, Field(default=d.field_value, alias=d.field_alias)) for d in self._model_def + d.field: (d.field_type, Field(default=d.field_value, alias=d.field_alias, exclude=d.field_exclude)) for d in self._model_def } DynamicModel = create_model(self._model_name, **fields) DynamicModel.__config__.allow_population_by_field_name = True @@ -102,5 +104,48 @@ StableDiffusionTxt2ImgProcessingAPI = PydanticModelGenerator( StableDiffusionImg2ImgProcessingAPI = PydanticModelGenerator( "StableDiffusionProcessingImg2Img", StableDiffusionProcessingImg2Img, - [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "init_images", "type": list, "default": None}, {"key": "denoising_strength", "type": float, "default": 0.75}, {"key": "mask", "type": str, "default": None}] -).generate_model()
\ No newline at end of file + [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "init_images", "type": list, "default": None}, {"key": "denoising_strength", "type": float, "default": 0.75}, {"key": "mask", "type": str, "default": None}, {"key": "include_init_images", "type": bool, "default": False, "exclude" : True}] +).generate_model() + +class TextToImageResponse(BaseModel): + images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + parameters: dict + info: str + +class ImageToImageResponse(BaseModel): + images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + parameters: dict + info: str + +class ExtrasBaseRequest(BaseModel): + resize_mode: Literal[0, 1] = Field(default=0, title="Resize Mode", description="Sets the resize mode: 0 to upscale by upscaling_resize amount, 1 to upscale up to upscaling_resize_h x upscaling_resize_w.") + show_extras_results: bool = Field(default=True, title="Show results", description="Should the backend return the generated image?") + gfpgan_visibility: float = Field(default=0, title="GFPGAN Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of GFPGAN, values should be between 0 and 1.") + codeformer_visibility: float = Field(default=0, title="CodeFormer Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of CodeFormer, values should be between 0 and 1.") + codeformer_weight: float = Field(default=0, title="CodeFormer Weight", ge=0, le=1, allow_inf_nan=False, description="Sets the weight of CodeFormer, values should be between 0 and 1.") + upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=4, description="By how much to upscale the image, only used when resize_mode=0.") + upscaling_resize_w: int = Field(default=512, title="Target Width", ge=1, description="Target width for the upscaler to hit. Only used when resize_mode=1.") + upscaling_resize_h: int = Field(default=512, title="Target Height", ge=1, description="Target height for the upscaler to hit. Only used when resize_mode=1.") + upscaling_crop: bool = Field(default=True, title="Crop to fit", description="Should the upscaler crop the image to fit in the choosen size?") + upscaler_1: str = Field(default="None", title="Main upscaler", description=f"The name of the main upscaler to use, it has to be one of this list: {' , '.join([x.name for x in sd_upscalers])}") + upscaler_2: str = Field(default="None", title="Secondary upscaler", description=f"The name of the secondary upscaler to use, it has to be one of this list: {' , '.join([x.name for x in sd_upscalers])}") + extras_upscaler_2_visibility: float = Field(default=0, title="Secondary upscaler visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of secondary upscaler, values should be between 0 and 1.") + +class ExtraBaseResponse(BaseModel): + html_info: str = Field(title="HTML info", description="A series of HTML tags containing the process info.") + +class ExtrasSingleImageRequest(ExtrasBaseRequest): + image: str = Field(default="", title="Image", description="Image to work on, must be a Base64 string containing the image's data.") + +class ExtrasSingleImageResponse(ExtraBaseResponse): + image: str = Field(default=None, title="Image", description="The generated image in base64 format.") + +class FileData(BaseModel): + data: str = Field(title="File data", description="Base64 representation of the file") + name: str = Field(title="File name") + +class ExtrasBatchImagesRequest(ExtrasBaseRequest): + imageList: list[FileData] = Field(title="Images", description="List of images to work on. Must be Base64 strings") + +class ExtrasBatchImagesResponse(ExtraBaseResponse): + images: list[str] = Field(title="Images", description="The generated images in base64 format.")
\ No newline at end of file diff --git a/modules/extras.py b/modules/extras.py index 22c5a1c1..4d51088b 100644 --- a/modules/extras.py +++ b/modules/extras.py @@ -1,3 +1,4 @@ +from __future__ import annotations
import math
import os
@@ -7,6 +8,10 @@ from PIL import Image import torch
import tqdm
+from typing import Callable, List, OrderedDict, Tuple
+from functools import partial
+from dataclasses import dataclass
+
from modules import processing, shared, images, devices, sd_models
from modules.shared import opts
import modules.gfpgan_model
@@ -17,10 +22,38 @@ import piexif.helper import gradio as gr
-cached_images = {}
+class LruCache(OrderedDict):
+ @dataclass(frozen=True)
+ class Key:
+ image_hash: int
+ info_hash: int
+ args_hash: int
+
+ @dataclass
+ class Value:
+ image: Image.Image
+ info: str
+
+ def __init__(self, max_size: int = 5, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._max_size = max_size
+
+ def get(self, key: LruCache.Key) -> LruCache.Value:
+ ret = super().get(key)
+ if ret is not None:
+ self.move_to_end(key) # Move to end of eviction list
+ return ret
+
+ def put(self, key: LruCache.Key, value: LruCache.Value) -> None:
+ self[key] = value
+ while len(self) > self._max_size:
+ self.popitem(last=False)
+
+cached_images: LruCache = LruCache(max_size=5)
-def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_dir, show_extras_results, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility):
+
+def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_dir, show_extras_results, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, upscale_first: bool):
devices.torch_gc()
imageArr = []
@@ -39,7 +72,7 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_ if input_dir == '':
return outputs, "Please select an input directory.", ''
- image_list = [file for file in [os.path.join(input_dir, x) for x in sorted(os.listdir(input_dir))] if os.path.isfile(file)]
+ image_list = shared.listfiles(input_dir)
for img in image_list:
try:
image = Image.open(img)
@@ -56,72 +89,102 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_ else:
outpath = opts.outdir_samples or opts.outdir_extras_samples
-
- for image, image_name in zip(imageArr, imageNameArr):
- if image is None:
- return outputs, "Please select an input image.", ''
- existing_pnginfo = image.info or {}
+ # Extra operation definitions
- image = image.convert("RGB")
- info = ""
+ def run_gfpgan(image: Image.Image, info: str) -> Tuple[Image.Image, str]:
+ restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(image, dtype=np.uint8))
+ res = Image.fromarray(restored_img)
- if gfpgan_visibility > 0:
- restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(image, dtype=np.uint8))
- res = Image.fromarray(restored_img)
+ if gfpgan_visibility < 1.0:
+ res = Image.blend(image, res, gfpgan_visibility)
- if gfpgan_visibility < 1.0:
- res = Image.blend(image, res, gfpgan_visibility)
+ info += f"GFPGAN visibility:{round(gfpgan_visibility, 2)}\n"
+ return (res, info)
- info += f"GFPGAN visibility:{round(gfpgan_visibility, 2)}\n"
- image = res
+ def run_codeformer(image: Image.Image, info: str) -> Tuple[Image.Image, str]:
+ restored_img = modules.codeformer_model.codeformer.restore(np.array(image, dtype=np.uint8), w=codeformer_weight)
+ res = Image.fromarray(restored_img)
- if codeformer_visibility > 0:
- restored_img = modules.codeformer_model.codeformer.restore(np.array(image, dtype=np.uint8), w=codeformer_weight)
- res = Image.fromarray(restored_img)
+ if codeformer_visibility < 1.0:
+ res = Image.blend(image, res, codeformer_visibility)
- if codeformer_visibility < 1.0:
- res = Image.blend(image, res, codeformer_visibility)
+ info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n"
+ return (res, info)
- info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n"
- image = res
+ def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop):
+ upscaler = shared.sd_upscalers[scaler_index]
+ res = upscaler.scaler.upscale(image, resize, upscaler.data_path)
+ if mode == 1 and crop:
+ cropped = Image.new("RGB", (resize_w, resize_h))
+ cropped.paste(res, box=(resize_w // 2 - res.width // 2, resize_h // 2 - res.height // 2))
+ res = cropped
+ return res
+ def run_prepare_crop(image: Image.Image, info: str) -> Tuple[Image.Image, str]:
+ # Actual crop happens in run_upscalers_blend, this just sets upscaling_resize and adds info text
+ nonlocal upscaling_resize
if resize_mode == 1:
upscaling_resize = max(upscaling_resize_w/image.width, upscaling_resize_h/image.height)
crop_info = " (crop)" if upscaling_crop else ""
info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n"
+ return (image, info)
+
+ @dataclass
+ class UpscaleParams:
+ upscaler_idx: int
+ blend_alpha: float
+
+ def run_upscalers_blend(params: List[UpscaleParams], image: Image.Image, info: str) -> Tuple[Image.Image, str]:
+ blended_result: Image.Image = None
+ for upscaler in params:
+ upscale_args = (upscaler.upscaler_idx, upscaling_resize, resize_mode,
+ upscaling_resize_w, upscaling_resize_h, upscaling_crop)
+ cache_key = LruCache.Key(image_hash=hash(np.array(image.getdata()).tobytes()),
+ info_hash=hash(info),
+ args_hash=hash(upscale_args))
+ cached_entry = cached_images.get(cache_key)
+ if cached_entry is None:
+ res = upscale(image, *upscale_args)
+ info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {upscaler.blend_alpha}, model:{shared.sd_upscalers[upscaler.upscaler_idx].name}\n"
+ cached_images.put(cache_key, LruCache.Value(image=res, info=info))
+ else:
+ res, info = cached_entry.image, cached_entry.info
+
+ if blended_result is None:
+ blended_result = res
+ else:
+ blended_result = Image.blend(blended_result, res, upscaler.blend_alpha)
+ return (blended_result, info)
+
+ # Build a list of operations to run
+ facefix_ops: List[Callable] = []
+ facefix_ops += [run_gfpgan] if gfpgan_visibility > 0 else []
+ facefix_ops += [run_codeformer] if codeformer_visibility > 0 else []
+
+ upscale_ops: List[Callable] = []
+ upscale_ops += [run_prepare_crop] if resize_mode == 1 else []
+
+ if upscaling_resize != 0:
+ step_params: List[UpscaleParams] = []
+ step_params.append(UpscaleParams(upscaler_idx=extras_upscaler_1, blend_alpha=1.0))
+ if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0:
+ step_params.append(UpscaleParams(upscaler_idx=extras_upscaler_2, blend_alpha=extras_upscaler_2_visibility))
+
+ upscale_ops.append(partial(run_upscalers_blend, step_params))
+
+ extras_ops: List[Callable] = (upscale_ops + facefix_ops) if upscale_first else (facefix_ops + upscale_ops)
+
+ for image, image_name in zip(imageArr, imageNameArr):
+ if image is None:
+ return outputs, "Please select an input image.", ''
+ existing_pnginfo = image.info or {}
+
+ image = image.convert("RGB")
+ info = ""
+ # Run each operation on each image
+ for op in extras_ops:
+ image, info = op(image, info)
- if upscaling_resize != 1.0:
- def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop):
- small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10))
- pixels = tuple(np.array(small).flatten().tolist())
- key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight,
- resize_mode, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop) + pixels
-
- c = cached_images.get(key)
- if c is None:
- upscaler = shared.sd_upscalers[scaler_index]
- c = upscaler.scaler.upscale(image, resize, upscaler.data_path)
- if mode == 1 and crop:
- cropped = Image.new("RGB", (resize_w, resize_h))
- cropped.paste(c, box=(resize_w // 2 - c.width // 2, resize_h // 2 - c.height // 2))
- c = cropped
- cached_images[key] = c
-
- return c
-
- info += f"Upscale: {round(upscaling_resize, 3)}, model:{shared.sd_upscalers[extras_upscaler_1].name}\n"
- res = upscale(image, extras_upscaler_1, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop)
-
- if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0:
- res2 = upscale(image, extras_upscaler_2, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop)
- info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {round(extras_upscaler_2_visibility, 3)}, model:{shared.sd_upscalers[extras_upscaler_2].name}\n"
- res = Image.blend(res, res2, extras_upscaler_2_visibility)
-
- image = res
-
- while len(cached_images) > 2:
- del cached_images[next(iter(cached_images.keys()))]
-
if opts.use_original_name_batch and image_name != None:
basename = os.path.splitext(os.path.basename(image_name))[0]
else:
@@ -141,6 +204,9 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_ return outputs, plaintext_to_html(info), ''
+def clear_cache():
+ cached_images.clear()
+
def run_pnginfo(image):
if image is None:
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py index f73647da..d590e9ee 100644 --- a/modules/generation_parameters_copypaste.py +++ b/modules/generation_parameters_copypaste.py @@ -1,14 +1,20 @@ +import base64
+import io
import os
import re
import gradio as gr
from modules.shared import script_path
from modules import shared
+import tempfile
+from PIL import Image, PngImagePlugin
re_param_code = r'\s*([\w ]+):\s*("(?:\\|\"|[^\"])+"|[^,]*)(?:,|$)'
re_param = re.compile(re_param_code)
re_params = re.compile(r"^(?:" + re_param_code + "){3,}$")
re_imagesize = re.compile(r"^(\d+)x(\d+)$")
type_of_gr_update = type(gr.update())
+paste_fields = {}
+bind_list = []
def quote(text):
@@ -20,6 +26,94 @@ def quote(text): text = text.replace('"', '\\"')
return f'"{text}"'
+
+def image_from_url_text(filedata):
+ if type(filedata) == dict and filedata["is_file"]:
+ filename = filedata["name"]
+ tempdir = os.path.normpath(tempfile.gettempdir())
+ normfn = os.path.normpath(filename)
+ assert normfn.startswith(tempdir), 'trying to open image file not in temporary directory'
+
+ return Image.open(filename)
+
+ if type(filedata) == list:
+ if len(filedata) == 0:
+ return None
+
+ filedata = filedata[0]
+
+ if filedata.startswith("data:image/png;base64,"):
+ filedata = filedata[len("data:image/png;base64,"):]
+
+ filedata = base64.decodebytes(filedata.encode('utf-8'))
+ image = Image.open(io.BytesIO(filedata))
+ return image
+
+
+def add_paste_fields(tabname, init_img, fields):
+ paste_fields[tabname] = {"init_img": init_img, "fields": fields}
+
+ # backwards compatibility for existing extensions
+ import modules.ui
+ if tabname == 'txt2img':
+ modules.ui.txt2img_paste_fields = fields
+ elif tabname == 'img2img':
+ modules.ui.img2img_paste_fields = fields
+
+
+def create_buttons(tabs_list):
+ buttons = {}
+ for tab in tabs_list:
+ buttons[tab] = gr.Button(f"Send to {tab}")
+ return buttons
+
+
+#if send_generate_info is a tab name, mean generate_info comes from the params fields of the tab
+def bind_buttons(buttons, send_image, send_generate_info):
+ bind_list.append([buttons, send_image, send_generate_info])
+
+
+def run_bind():
+ for buttons, send_image, send_generate_info in bind_list:
+ for tab in buttons:
+ button = buttons[tab]
+ if send_image and paste_fields[tab]["init_img"]:
+ if type(send_image) == gr.Gallery:
+ button.click(
+ fn=lambda x: image_from_url_text(x),
+ _js="extract_image_from_gallery",
+ inputs=[send_image],
+ outputs=[paste_fields[tab]["init_img"]],
+ )
+ else:
+ button.click(
+ fn=lambda x:x,
+ inputs=[send_image],
+ outputs=[paste_fields[tab]["init_img"]],
+ )
+
+ if send_generate_info and paste_fields[tab]["fields"] is not None:
+ paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration', 'Size-1', 'Size-2']
+ if shared.opts.send_seed:
+ paste_field_names += ["Seed"]
+ if send_generate_info in paste_fields:
+ button.click(
+ fn=lambda *x:x,
+ inputs=[field for field,name in paste_fields[send_generate_info]["fields"] if name in paste_field_names],
+ outputs=[field for field,name in paste_fields[tab]["fields"] if name in paste_field_names],
+ )
+
+ else:
+ connect_paste(button, [(field, name) for field, name in paste_fields[tab]["fields"] if name in paste_field_names], send_generate_info)
+
+ button.click(
+ fn=None,
+ _js=f"switch_to_{tab}",
+ inputs=None,
+ outputs=None,
+ )
+
+
def parse_generation_parameters(x: str):
"""parses generation parameters string, the one you see in text field under the picture in UI:
```
@@ -68,7 +162,7 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model return res
-def connect_paste(button, paste_fields, input_comp, js=None):
+def connect_paste(button, paste_fields, input_comp, jsfunc=None):
def paste_func(prompt):
if not prompt and not shared.cmd_opts.hide_ui_dir_config:
filename = os.path.join(script_path, "params.txt")
@@ -106,7 +200,9 @@ def connect_paste(button, paste_fields, input_comp, js=None): button.click(
fn=paste_func,
- _js=js,
+ _js=jsfunc,
inputs=[input_comp],
outputs=[x[0] for x in paste_fields],
)
+
+
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index d647ea55..2e84583b 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -5,6 +5,7 @@ import html import os
import sys
import traceback
+import inspect
import modules.textual_inversion.dataset
import torch
@@ -15,20 +16,26 @@ from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion
from modules.textual_inversion.learn_schedule import LearnRateScheduler
from torch import einsum
+from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_
from collections import defaultdict, deque
from statistics import stdev, mean
+
class HypernetworkModule(torch.nn.Module):
multiplier = 1.0
activation_dict = {
+ "linear": torch.nn.Identity,
"relu": torch.nn.ReLU,
"leakyrelu": torch.nn.LeakyReLU,
"elu": torch.nn.ELU,
"swish": torch.nn.Hardswish,
+ "tanh": torch.nn.Tanh,
+ "sigmoid": torch.nn.Sigmoid,
}
+ activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
- def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False):
+ def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
@@ -65,9 +72,24 @@ class HypernetworkModule(torch.nn.Module): else:
for layer in self.linear:
if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm:
- layer.weight.data.normal_(mean=0.0, std=0.01)
- layer.bias.data.zero_()
-
+ w, b = layer.weight.data, layer.bias.data
+ if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm:
+ normal_(w, mean=0.0, std=0.01)
+ normal_(b, mean=0.0, std=0.005)
+ elif weight_init == 'XavierUniform':
+ xavier_uniform_(w)
+ zeros_(b)
+ elif weight_init == 'XavierNormal':
+ xavier_normal_(w)
+ zeros_(b)
+ elif weight_init == 'KaimingUniform':
+ kaiming_uniform_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu')
+ zeros_(b)
+ elif weight_init == 'KaimingNormal':
+ kaiming_normal_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu')
+ zeros_(b)
+ else:
+ raise KeyError(f"Key {weight_init} is not defined as initialization!")
self.to(devices.device)
def fix_old_state_dict(self, state_dict):
@@ -105,7 +127,7 @@ class Hypernetwork: filename = None
name = None
- def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False):
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
self.filename = None
self.name = name
self.layers = {}
@@ -114,13 +136,14 @@ class Hypernetwork: self.sd_checkpoint_name = None
self.layer_structure = layer_structure
self.activation_func = activation_func
+ self.weight_init = weight_init
self.add_layer_norm = add_layer_norm
self.use_dropout = use_dropout
for size in enable_sizes or []:
self.layers[size] = (
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
)
def weights(self):
@@ -144,6 +167,7 @@ class Hypernetwork: state_dict['layer_structure'] = self.layer_structure
state_dict['activation_func'] = self.activation_func
state_dict['is_layer_norm'] = self.add_layer_norm
+ state_dict['weight_initialization'] = self.weight_init
state_dict['use_dropout'] = self.use_dropout
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
@@ -158,15 +182,21 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu')
self.layer_structure = state_dict.get('layer_structure', [1, 2, 1])
+ print(self.layer_structure)
self.activation_func = state_dict.get('activation_func', None)
+ print(f"Activation function is {self.activation_func}")
+ self.weight_init = state_dict.get('weight_initialization', 'Normal')
+ print(f"Weight initialization is {self.weight_init}")
self.add_layer_norm = state_dict.get('is_layer_norm', False)
+ print(f"Layer norm is set to {self.add_layer_norm}")
self.use_dropout = state_dict.get('use_dropout', False)
+ print(f"Dropout usage is set to {self.use_dropout}" )
for size, sd in state_dict.items():
if type(size) == int:
self.layers[size] = (
- HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
- HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
+ HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout),
)
self.name = state_dict.get('name', self.name)
@@ -399,7 +429,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer.step()
- if torch.isnan(losses[hypernetwork.step % losses.shape[0]]):
+ steps_done = hypernetwork.step + 1
+
+ if torch.isnan(losses[hypernetwork.step % losses.shape[0]]):
raise RuntimeError("Loss diverged.")
if len(previous_mean_losses) > 1:
@@ -409,9 +441,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})"
pbar.set_description(dataset_loss_info)
- if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
+ if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0:
# Before saving, change name to match current checkpoint.
- hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}'
+ hypernetwork.name = f'{hypernetwork_name}-{steps_done}'
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt')
hypernetwork.save(last_saved_file)
@@ -420,8 +452,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log "learn_rate": scheduler.learn_rate
})
- if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0:
- forced_filename = f'{hypernetwork_name}-{hypernetwork.step}'
+ if images_dir is not None and steps_done % create_image_every == 0:
+ forced_filename = f'{hypernetwork_name}-{steps_done}'
last_saved_image = os.path.join(images_dir, forced_filename)
optimizer.zero_grad()
@@ -458,7 +490,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if image is not None:
shared.state.current_image = image
- last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename)
+ last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
last_saved_image += f", prompt: {preview_text}"
shared.state.job_no = hypernetwork.step
diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 2b472d87..aad09ffc 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -8,8 +8,10 @@ import modules.textual_inversion.textual_inversion from modules import devices, sd_hijack, shared
from modules.hypernetworks import hypernetwork
+not_available = ["hardswish", "multiheadattention"]
+keys = ["linear"] + list(x for x in hypernetwork.HypernetworkModule.activation_dict.keys() if x not in not_available)
-def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False):
+def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
# Remove illegal characters from name.
name = "".join( x for x in name if (x.isalnum() or x in "._- "))
@@ -25,6 +27,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, enable_sizes=[int(x) for x in enable_sizes],
layer_structure=layer_structure,
activation_func=activation_func,
+ weight_init=weight_init,
add_layer_norm=add_layer_norm,
use_dropout=use_dropout,
)
diff --git a/modules/images.py b/modules/images.py index 286de2ae..a0728553 100644 --- a/modules/images.py +++ b/modules/images.py @@ -277,7 +277,7 @@ invalid_filename_chars = '<>:"/\\|?*\n' invalid_filename_prefix = ' '
invalid_filename_postfix = ' .'
re_nonletters = re.compile(r'[\s' + string.punctuation + ']+')
-re_pattern = re.compile(r"([^\[\]]+|\[([^]]+)]|[\[\]]*)")
+re_pattern = re.compile(r"(.*?)(?:\[([^\[\]]+)\]|$)")
re_pattern_arg = re.compile(r"(.*)<([^>]*)>$")
max_filename_part_length = 128
@@ -300,8 +300,8 @@ class FilenameGenerator: 'seed': lambda self: self.seed if self.seed is not None else '',
'steps': lambda self: self.p and self.p.steps,
'cfg': lambda self: self.p and self.p.cfg_scale,
- 'width': lambda self: self.p and self.p.width,
- 'height': lambda self: self.p and self.p.height,
+ 'width': lambda self: self.image.width,
+ 'height': lambda self: self.image.height,
'styles': lambda self: self.p and sanitize_filename_part(", ".join([style for style in self.p.styles if not style == "None"]) or "None", replace_spaces=False),
'sampler': lambda self: self.p and sanitize_filename_part(sd_samplers.samplers[self.p.sampler_index].name, replace_spaces=False),
'model_hash': lambda self: getattr(self.p, "sd_model_hash", shared.sd_model.sd_model_hash),
@@ -315,10 +315,11 @@ class FilenameGenerator: }
default_time_format = '%Y%m%d%H%M%S'
- def __init__(self, p, seed, prompt):
+ def __init__(self, p, seed, prompt, image):
self.p = p
self.seed = seed
self.prompt = prompt
+ self.image = image
def prompt_no_style(self):
if self.p is None or self.prompt is None:
@@ -343,7 +344,7 @@ class FilenameGenerator: def datetime(self, *args):
time_datetime = datetime.datetime.now()
- time_format = args[0] if len(args) > 0 else self.default_time_format
+ time_format = args[0] if len(args) > 0 and args[0] != "" else self.default_time_format
try:
time_zone = pytz.timezone(args[1]) if len(args) > 1 else None
except pytz.exceptions.UnknownTimeZoneError as _:
@@ -362,9 +363,9 @@ class FilenameGenerator: for m in re_pattern.finditer(x):
text, pattern = m.groups()
+ res += text
if pattern is None:
- res += text
continue
pattern_args = []
@@ -385,12 +386,9 @@ class FilenameGenerator: print(f"Error adding [{pattern}] to filename", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
- if replacement is None:
- res += f'[{pattern}]'
- else:
+ if replacement is not None:
res += str(replacement)
-
- continue
+ continue
res += f'[{pattern}]'
@@ -452,18 +450,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i txt_fullfn (`str` or None):
If a text file is saved for this image, this will be its full path. Otherwise None.
"""
- namegen = FilenameGenerator(p, seed, prompt)
-
- if extension == 'png' and opts.enable_pnginfo and info is not None:
- pnginfo = PngImagePlugin.PngInfo()
-
- if existing_info is not None:
- for k, v in existing_info.items():
- pnginfo.add_text(k, str(v))
-
- pnginfo.add_text(pnginfo_section_name, info)
- else:
- pnginfo = None
+ namegen = FilenameGenerator(p, seed, prompt, image)
if save_to_dirs is None:
save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt)
@@ -492,19 +479,27 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i if add_number:
basecount = get_next_sequence_number(path, basename)
fullfn = None
- fullfn_without_extension = None
for i in range(500):
fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}"
fullfn = os.path.join(path, f"{fn}{file_decoration}.{extension}")
- fullfn_without_extension = os.path.join(path, f"{fn}{file_decoration}")
if not os.path.exists(fullfn):
break
else:
fullfn = os.path.join(path, f"{file_decoration}.{extension}")
- fullfn_without_extension = os.path.join(path, file_decoration)
else:
fullfn = os.path.join(path, f"{forced_filename}.{extension}")
- fullfn_without_extension = os.path.join(path, forced_filename)
+
+ pnginfo = existing_info or {}
+ if info is not None:
+ pnginfo[pnginfo_section_name] = info
+
+ params = script_callbacks.ImageSaveParams(image, p, fullfn, pnginfo)
+ script_callbacks.before_image_saved_callback(params)
+
+ image = params.image
+ fullfn = params.filename
+ info = params.pnginfo.get(pnginfo_section_name, None)
+ fullfn_without_extension, extension = os.path.splitext(params.filename)
def exif_bytes():
return piexif.dump({
@@ -513,12 +508,20 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i },
})
- if extension.lower() in ("jpg", "jpeg", "webp"):
+ if extension.lower() == '.png':
+ pnginfo_data = PngImagePlugin.PngInfo()
+ for k, v in params.pnginfo.items():
+ pnginfo_data.add_text(k, str(v))
+
+ image.save(fullfn, quality=opts.jpeg_quality, pnginfo=pnginfo_data)
+
+ elif extension.lower() in (".jpg", ".jpeg", ".webp"):
image.save(fullfn, quality=opts.jpeg_quality)
+
if opts.enable_pnginfo and info is not None:
piexif.insert(exif_bytes(), fullfn)
else:
- image.save(fullfn, quality=opts.jpeg_quality, pnginfo=pnginfo)
+ image.save(fullfn, quality=opts.jpeg_quality)
target_side_length = 4000
oversize = image.width > target_side_length or image.height > target_side_length
@@ -541,7 +544,8 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i else:
txt_fullfn = None
- script_callbacks.image_saved_callback(image, p, fullfn, txt_fullfn)
+ script_callbacks.image_saved_callback(params)
+
return fullfn, txt_fullfn
diff --git a/modules/img2img.py b/modules/img2img.py index 8d9f7cf9..efda26e1 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -19,7 +19,7 @@ import modules.scripts def process_batch(p, input_dir, output_dir, args):
processing.fix_seed(p)
- images = [file for file in [os.path.join(input_dir, x) for x in os.listdir(input_dir)] if os.path.isfile(file)]
+ images = shared.listfiles(input_dir)
print(f"Will process {len(images)} images, creating {p.n_iter * p.batch_size} new images for each.")
@@ -39,6 +39,8 @@ def process_batch(p, input_dir, output_dir, args): break
img = Image.open(image)
+ # Use the EXIF orientation of photos taken by smartphones.
+ img = ImageOps.exif_transpose(img)
p.init_images = [img] * p.batch_size
proc = modules.scripts.scripts_img2img.run(p, *args)
@@ -61,19 +63,25 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro is_batch = mode == 2
if is_inpaint:
+ # Drawn mask
if mask_mode == 0:
image = init_img_with_mask['image']
mask = init_img_with_mask['mask']
alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1')
mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L')
image = image.convert('RGB')
+ # Uploaded mask
else:
image = init_img_inpaint
mask = init_mask_inpaint
+ # No mask
else:
image = init_img
mask = None
+ # Use the EXIF orientation of photos taken by smartphones.
+ image = ImageOps.exif_transpose(image)
+
assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
p = StableDiffusionProcessingImg2Img(
diff --git a/modules/processing.py b/modules/processing.py index c61bbfbd..548eec29 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -77,9 +77,8 @@ def get_correct_sampler(p): class StableDiffusionProcessing():
"""
The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
-
"""
- def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str="", styles: List[str]=None, seed: int=-1, subseed: int=-1, subseed_strength: float=0, seed_resize_from_h: int=-1, seed_resize_from_w: int=-1, seed_enable_extras: bool=True, sampler_index: int=0, batch_size: int=1, n_iter: int=1, steps:int =50, cfg_scale:float=7.0, width:int=512, height:int=512, restore_faces:bool=False, tiling:bool=False, do_not_save_samples:bool=False, do_not_save_grid:bool=False, extra_generation_params: Dict[Any,Any]=None, overlay_images: Any=None, negative_prompt: str=None, eta: float =None, do_not_reload_embeddings: bool=False, denoising_strength: float = 0, ddim_discretize: str = "uniform", s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0):
+ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_index: int = 0, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None):
self.sd_model = sd_model
self.outpath_samples: str = outpath_samples
self.outpath_grids: str = outpath_grids
@@ -109,13 +108,14 @@ class StableDiffusionProcessing(): self.do_not_reload_embeddings = do_not_reload_embeddings
self.paste_to = None
self.color_corrections = None
- self.denoising_strength: float = 0
+ self.denoising_strength: float = denoising_strength
self.sampler_noise_scheduler_override = None
- self.ddim_discretize = opts.ddim_discretize
+ self.ddim_discretize = ddim_discretize or opts.ddim_discretize
self.s_churn = s_churn or opts.s_churn
self.s_tmin = s_tmin or opts.s_tmin
self.s_tmax = s_tmax or float('inf') # not representable as a standard ui option
self.s_noise = s_noise or opts.s_noise
+ self.override_settings = {k: v for k, v in (override_settings or {}).items() if k not in shared.restricted_opts}
if not seed_enable_extras:
self.subseed = -1
@@ -129,6 +129,72 @@ class StableDiffusionProcessing(): self.all_seeds = None
self.all_subseeds = None
+ def txt2img_image_conditioning(self, x, width=None, height=None):
+ if self.sampler.conditioning_key not in {'hybrid', 'concat'}:
+ # Dummy zero conditioning if we're not using inpainting model.
+ # Still takes up a bit of memory, but no encoder call.
+ # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
+ return torch.zeros(
+ x.shape[0], 5, 1, 1,
+ dtype=x.dtype,
+ device=x.device
+ )
+
+ height = height or self.height
+ width = width or self.width
+
+ # The "masked-image" in this case will just be all zeros since the entire image is masked.
+ image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
+ image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
+
+ # Add the fake full 1s mask to the first dimension.
+ image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+ image_conditioning = image_conditioning.to(x.dtype)
+
+ return image_conditioning
+
+ def img2img_image_conditioning(self, source_image, latent_image, image_mask = None):
+ if self.sampler.conditioning_key not in {'hybrid', 'concat'}:
+ # Dummy zero conditioning if we're not using inpainting model.
+ return torch.zeros(
+ latent_image.shape[0], 5, 1, 1,
+ dtype=latent_image.dtype,
+ device=latent_image.device
+ )
+
+ # Handle the different mask inputs
+ if image_mask is not None:
+ if torch.is_tensor(image_mask):
+ conditioning_mask = image_mask
+ else:
+ conditioning_mask = np.array(image_mask.convert("L"))
+ conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
+ conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
+
+ # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
+ conditioning_mask = torch.round(conditioning_mask)
+ else:
+ conditioning_mask = torch.ones(1, 1, *source_image.shape[-2:])
+
+ # Create another latent image, this time with a masked version of the original input.
+ # Smoothly interpolate between the masked and unmasked latent conditioning image using a parameter.
+ conditioning_mask = conditioning_mask.to(source_image.device)
+ conditioning_image = torch.lerp(
+ source_image,
+ source_image * (1.0 - conditioning_mask),
+ getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight)
+ )
+
+ # Encode the new masked image using first stage of network.
+ conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
+
+ # Create the concatenated conditioning tensor to be fed to `c_concat`
+ conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=latent_image.shape[-2:])
+ conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
+ image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
+ image_conditioning = image_conditioning.to(shared.device).type(self.sd_model.dtype)
+
+ return image_conditioning
def init(self, all_prompts, all_seeds, all_subseeds):
pass
@@ -351,6 +417,22 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration def process_images(p: StableDiffusionProcessing) -> Processed:
+ stored_opts = {k: opts.data[k] for k in p.override_settings.keys()}
+
+ try:
+ for k, v in p.override_settings.items():
+ opts.data[k] = v # we don't call onchange for simplicity which makes changing model, hypernet impossible
+
+ res = process_images_inner(p)
+
+ finally:
+ for k, v in stored_opts.items():
+ opts.data[k] = v
+
+ return res
+
+
+def process_images_inner(p: StableDiffusionProcessing) -> Processed:
"""this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
if type(p.prompt) == list:
@@ -556,37 +638,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f
self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
- def create_dummy_mask(self, x, width=None, height=None):
- if self.sampler.conditioning_key in {'hybrid', 'concat'}:
- height = height or self.height
- width = width or self.width
-
- # The "masked-image" in this case will just be all zeros since the entire image is masked.
- image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
- image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
-
- # Add the fake full 1s mask to the first dimension.
- image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
- image_conditioning = image_conditioning.to(x.dtype)
-
- else:
- # Dummy zero conditioning if we're not using inpainting model.
- # Still takes up a bit of memory, but no encoder call.
- # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
- image_conditioning = torch.zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
-
- return image_conditioning
-
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
if not self.enable_hr:
x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x))
+ samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
return samples
x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, self.firstphase_width, self.firstphase_height))
+ samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x, self.firstphase_width, self.firstphase_height))
samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
@@ -623,7 +684,12 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): x = None
devices.torch_gc()
- samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps, image_conditioning=self.create_dummy_mask(samples))
+ image_conditioning = self.img2img_image_conditioning(
+ decoded_samples,
+ samples,
+ decoded_samples.new_ones(decoded_samples.shape[0], 1, decoded_samples.shape[2], decoded_samples.shape[3])
+ )
+ samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps, image_conditioning=image_conditioning)
return samples
@@ -755,33 +821,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): elif self.inpainting_fill == 3:
self.init_latent = self.init_latent * self.mask
- if self.sampler.conditioning_key in {'hybrid', 'concat'}:
- if self.image_mask is not None:
- conditioning_mask = np.array(self.image_mask.convert("L"))
- conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
- conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
-
- # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
- conditioning_mask = torch.round(conditioning_mask)
- else:
- conditioning_mask = torch.ones(1, 1, *image.shape[-2:])
-
- # Create another latent image, this time with a masked version of the original input.
- conditioning_mask = conditioning_mask.to(image.device)
- conditioning_image = image * (1.0 - conditioning_mask)
- conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
-
- # Create the concatenated conditioning tensor to be fed to `c_concat`
- conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:])
- conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
- self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
- self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype)
- else:
- self.image_conditioning = torch.zeros(
- self.init_latent.shape[0], 5, 1, 1,
- dtype=self.init_latent.dtype,
- device=self.init_latent.device
- )
+ self.image_conditioning = self.img2img_image_conditioning(image, self.init_latent, self.image_mask)
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index dc520abc..6ea58d61 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -9,15 +9,34 @@ def report_exception(c, job): print(traceback.format_exc(), file=sys.stderr)
+class ImageSaveParams:
+ def __init__(self, image, p, filename, pnginfo):
+ self.image = image
+ """the PIL image itself"""
+
+ self.p = p
+ """p object with processing parameters; either StableDiffusionProcessing or an object with same fields"""
+
+ self.filename = filename
+ """name of file that the image would be saved to"""
+
+ self.pnginfo = pnginfo
+ """dictionary with parameters for image's PNG info data; infotext will have the key 'parameters'"""
+
+
ScriptCallback = namedtuple("ScriptCallback", ["script", "callback"])
callbacks_model_loaded = []
callbacks_ui_tabs = []
callbacks_ui_settings = []
+callbacks_before_image_saved = []
callbacks_image_saved = []
+
def clear_callbacks():
callbacks_model_loaded.clear()
callbacks_ui_tabs.clear()
+ callbacks_ui_settings.clear()
+ callbacks_before_image_saved.clear()
callbacks_image_saved.clear()
@@ -49,10 +68,18 @@ def ui_settings_callback(): report_exception(c, 'ui_settings_callback')
-def image_saved_callback(image, p, fullfn, txt_fullfn):
+def before_image_saved_callback(params: ImageSaveParams):
for c in callbacks_image_saved:
try:
- c.callback(image, p, fullfn, txt_fullfn)
+ c.callback(params)
+ except Exception:
+ report_exception(c, 'before_image_saved_callback')
+
+
+def image_saved_callback(params: ImageSaveParams):
+ for c in callbacks_image_saved:
+ try:
+ c.callback(params)
except Exception:
report_exception(c, 'image_saved_callback')
@@ -64,7 +91,6 @@ def add_callback(callbacks, fun): callbacks.append(ScriptCallback(filename, fun))
-
def on_model_loaded(callback):
"""register a function to be called when the stable diffusion model is created; the model is
passed as an argument"""
@@ -90,11 +116,17 @@ def on_ui_settings(callback): add_callback(callbacks_ui_settings, callback)
-def on_save_imaged(callback):
- """register a function to be called after modules.images.save_image is called.
- The callback is called with three arguments:
- - p - procesing object (or a dummy object with same fields if the image is saved using save button)
- - fullfn - image filename
- - txt_fullfn - text file with parameters; may be None
+def on_before_image_saved(callback):
+ """register a function to be called before an image is saved to a file.
+ The callback is called with one argument:
+ - params: ImageSaveParams - parameters the image is to be saved with. You can change fields in this object.
+ """
+ add_callback(callbacks_before_image_saved, callback)
+
+
+def on_image_saved(callback):
+ """register a function to be called after an image is saved to a file.
+ The callback is called with one argument:
+ - params: ImageSaveParams - parameters the image was saved with. Changing fields in this object does nothing.
"""
add_callback(callbacks_image_saved, callback)
diff --git a/modules/sd_models.py b/modules/sd_models.py index e697bb72..64d5ee0d 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -3,6 +3,7 @@ import os.path import sys
from collections import namedtuple
import torch
+import re
from omegaconf import OmegaConf
from ldm.util import instantiate_from_config
@@ -35,8 +36,10 @@ def setup_model(): list_models()
-def checkpoint_tiles():
- return sorted([x.title for x in checkpoints_list.values()])
+def checkpoint_tiles():
+ convert = lambda name: int(name) if name.isdigit() else name.lower()
+ alphanumeric_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
+ return sorted([x.title for x in checkpoints_list.values()], key = alphanumeric_key)
def list_models():
diff --git a/modules/shared.py b/modules/shared.py index 308fccce..5d1ceb85 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -82,9 +82,10 @@ parser.add_argument("--api", action='store_true', help="use api=True to launch t parser.add_argument("--nowebui", action='store_true', help="use api=True to launch the api instead of the webui")
parser.add_argument("--ui-debug-mode", action='store_true', help="Don't load model to quickly launch UI")
parser.add_argument("--device-id", type=str, help="Select the default CUDA device to use (export CUDA_VISIBLE_DEVICES=0,1,etc might be needed before)", default=None)
+parser.add_argument("--administrator", action='store_true', help="Administrator rights", default=False)
cmd_opts = parser.parse_args()
-restricted_opts = [
+restricted_opts = {
"samples_filename_pattern",
"directories_filename_pattern",
"outdir_samples",
@@ -94,7 +95,7 @@ restricted_opts = [ "outdir_grids",
"outdir_txt2img_grids",
"outdir_save",
-]
+}
devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_swinir, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \
(devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'swinir', 'esrgan', 'scunet', 'codeformer'])
@@ -267,6 +268,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "sd_checkpoint_cache": OptionInfo(0, "Checkpoints to cache in RAM", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
"sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks),
"sd_hypernetwork_strength": OptionInfo(1.0, "Hypernetwork strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.001}),
+ "inpainting_mask_weight": OptionInfo(1.0, "Inpainting conditioning mask strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
"img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."),
"save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"),
"img2img_fix_steps": OptionInfo(False, "With img2img, do exactly the amount of steps the slider specifies (normally you'd do less with less denoising)."),
@@ -278,6 +280,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "filter_nsfw": OptionInfo(False, "Filter NSFW content"),
'CLIP_stop_at_last_layers': OptionInfo(1, "Stop At last layers of CLIP model", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
"random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}),
+ "send_seed": OptionInfo(False, "Send seed when sending prompt or image to other interface"),
}))
options_templates.update(options_section(('interrogate', "Interrogate Options"), {
@@ -449,3 +452,8 @@ total_tqdm = TotalTQDM() mem_mon = modules.memmon.MemUsageMonitor("MemMon", device, opts)
mem_mon.start()
+
+
+def listfiles(dirname):
+ filenames = [os.path.join(dirname, x) for x in sorted(os.listdir(dirname)) if not x.startswith(".")]
+ return [file for file in filenames if os.path.isfile(file)]
diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py new file mode 100644 index 00000000..9859974a --- /dev/null +++ b/modules/textual_inversion/autocrop.py @@ -0,0 +1,341 @@ +import cv2
+import requests
+import os
+from collections import defaultdict
+from math import log, sqrt
+import numpy as np
+from PIL import Image, ImageDraw
+
+GREEN = "#0F0"
+BLUE = "#00F"
+RED = "#F00"
+
+
+def crop_image(im, settings):
+ """ Intelligently crop an image to the subject matter """
+
+ scale_by = 1
+ if is_landscape(im.width, im.height):
+ scale_by = settings.crop_height / im.height
+ elif is_portrait(im.width, im.height):
+ scale_by = settings.crop_width / im.width
+ elif is_square(im.width, im.height):
+ if is_square(settings.crop_width, settings.crop_height):
+ scale_by = settings.crop_width / im.width
+ elif is_landscape(settings.crop_width, settings.crop_height):
+ scale_by = settings.crop_width / im.width
+ elif is_portrait(settings.crop_width, settings.crop_height):
+ scale_by = settings.crop_height / im.height
+
+ im = im.resize((int(im.width * scale_by), int(im.height * scale_by)))
+ im_debug = im.copy()
+
+ focus = focal_point(im_debug, settings)
+
+ # take the focal point and turn it into crop coordinates that try to center over the focal
+ # point but then get adjusted back into the frame
+ y_half = int(settings.crop_height / 2)
+ x_half = int(settings.crop_width / 2)
+
+ x1 = focus.x - x_half
+ if x1 < 0:
+ x1 = 0
+ elif x1 + settings.crop_width > im.width:
+ x1 = im.width - settings.crop_width
+
+ y1 = focus.y - y_half
+ if y1 < 0:
+ y1 = 0
+ elif y1 + settings.crop_height > im.height:
+ y1 = im.height - settings.crop_height
+
+ x2 = x1 + settings.crop_width
+ y2 = y1 + settings.crop_height
+
+ crop = [x1, y1, x2, y2]
+
+ results = []
+
+ results.append(im.crop(tuple(crop)))
+
+ if settings.annotate_image:
+ d = ImageDraw.Draw(im_debug)
+ rect = list(crop)
+ rect[2] -= 1
+ rect[3] -= 1
+ d.rectangle(rect, outline=GREEN)
+ results.append(im_debug)
+ if settings.destop_view_image:
+ im_debug.show()
+
+ return results
+
+def focal_point(im, settings):
+ corner_points = image_corner_points(im, settings) if settings.corner_points_weight > 0 else []
+ entropy_points = image_entropy_points(im, settings) if settings.entropy_points_weight > 0 else []
+ face_points = image_face_points(im, settings) if settings.face_points_weight > 0 else []
+
+ pois = []
+
+ weight_pref_total = 0
+ if len(corner_points) > 0:
+ weight_pref_total += settings.corner_points_weight
+ if len(entropy_points) > 0:
+ weight_pref_total += settings.entropy_points_weight
+ if len(face_points) > 0:
+ weight_pref_total += settings.face_points_weight
+
+ corner_centroid = None
+ if len(corner_points) > 0:
+ corner_centroid = centroid(corner_points)
+ corner_centroid.weight = settings.corner_points_weight / weight_pref_total
+ pois.append(corner_centroid)
+
+ entropy_centroid = None
+ if len(entropy_points) > 0:
+ entropy_centroid = centroid(entropy_points)
+ entropy_centroid.weight = settings.entropy_points_weight / weight_pref_total
+ pois.append(entropy_centroid)
+
+ face_centroid = None
+ if len(face_points) > 0:
+ face_centroid = centroid(face_points)
+ face_centroid.weight = settings.face_points_weight / weight_pref_total
+ pois.append(face_centroid)
+
+ average_point = poi_average(pois, settings)
+
+ if settings.annotate_image:
+ d = ImageDraw.Draw(im)
+ max_size = min(im.width, im.height) * 0.07
+ if corner_centroid is not None:
+ color = BLUE
+ box = corner_centroid.bounding(max_size * corner_centroid.weight)
+ d.text((box[0], box[1]-15), "Edge: %.02f" % corner_centroid.weight, fill=color)
+ d.ellipse(box, outline=color)
+ if len(corner_points) > 1:
+ for f in corner_points:
+ d.rectangle(f.bounding(4), outline=color)
+ if entropy_centroid is not None:
+ color = "#ff0"
+ box = entropy_centroid.bounding(max_size * entropy_centroid.weight)
+ d.text((box[0], box[1]-15), "Entropy: %.02f" % entropy_centroid.weight, fill=color)
+ d.ellipse(box, outline=color)
+ if len(entropy_points) > 1:
+ for f in entropy_points:
+ d.rectangle(f.bounding(4), outline=color)
+ if face_centroid is not None:
+ color = RED
+ box = face_centroid.bounding(max_size * face_centroid.weight)
+ d.text((box[0], box[1]-15), "Face: %.02f" % face_centroid.weight, fill=color)
+ d.ellipse(box, outline=color)
+ if len(face_points) > 1:
+ for f in face_points:
+ d.rectangle(f.bounding(4), outline=color)
+
+ d.ellipse(average_point.bounding(max_size), outline=GREEN)
+
+ return average_point
+
+
+def image_face_points(im, settings):
+ if settings.dnn_model_path is not None:
+ detector = cv2.FaceDetectorYN.create(
+ settings.dnn_model_path,
+ "",
+ (im.width, im.height),
+ 0.9, # score threshold
+ 0.3, # nms threshold
+ 5000 # keep top k before nms
+ )
+ faces = detector.detect(np.array(im))
+ results = []
+ if faces[1] is not None:
+ for face in faces[1]:
+ x = face[0]
+ y = face[1]
+ w = face[2]
+ h = face[3]
+ results.append(
+ PointOfInterest(
+ int(x + (w * 0.5)), # face focus left/right is center
+ int(y + (h * 0.33)), # face focus up/down is close to the top of the head
+ size = w,
+ weight = 1/len(faces[1])
+ )
+ )
+ return results
+ else:
+ np_im = np.array(im)
+ gray = cv2.cvtColor(np_im, cv2.COLOR_BGR2GRAY)
+
+ tries = [
+ [ f'{cv2.data.haarcascades}haarcascade_eye.xml', 0.01 ],
+ [ f'{cv2.data.haarcascades}haarcascade_frontalface_default.xml', 0.05 ],
+ [ f'{cv2.data.haarcascades}haarcascade_profileface.xml', 0.05 ],
+ [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt.xml', 0.05 ],
+ [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt2.xml', 0.05 ],
+ [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt_tree.xml', 0.05 ],
+ [ f'{cv2.data.haarcascades}haarcascade_eye_tree_eyeglasses.xml', 0.05 ],
+ [ f'{cv2.data.haarcascades}haarcascade_upperbody.xml', 0.05 ]
+ ]
+ for t in tries:
+ classifier = cv2.CascadeClassifier(t[0])
+ minsize = int(min(im.width, im.height) * t[1]) # at least N percent of the smallest side
+ try:
+ faces = classifier.detectMultiScale(gray, scaleFactor=1.1,
+ minNeighbors=7, minSize=(minsize, minsize), flags=cv2.CASCADE_SCALE_IMAGE)
+ except:
+ continue
+
+ if len(faces) > 0:
+ rects = [[f[0], f[1], f[0] + f[2], f[1] + f[3]] for f in faces]
+ return [PointOfInterest((r[0] +r[2]) // 2, (r[1] + r[3]) // 2, size=abs(r[0]-r[2]), weight=1/len(rects)) for r in rects]
+ return []
+
+
+def image_corner_points(im, settings):
+ grayscale = im.convert("L")
+
+ # naive attempt at preventing focal points from collecting at watermarks near the bottom
+ gd = ImageDraw.Draw(grayscale)
+ gd.rectangle([0, im.height*.9, im.width, im.height], fill="#999")
+
+ np_im = np.array(grayscale)
+
+ points = cv2.goodFeaturesToTrack(
+ np_im,
+ maxCorners=100,
+ qualityLevel=0.04,
+ minDistance=min(grayscale.width, grayscale.height)*0.06,
+ useHarrisDetector=False,
+ )
+
+ if points is None:
+ return []
+
+ focal_points = []
+ for point in points:
+ x, y = point.ravel()
+ focal_points.append(PointOfInterest(x, y, size=4, weight=1/len(points)))
+
+ return focal_points
+
+
+def image_entropy_points(im, settings):
+ landscape = im.height < im.width
+ portrait = im.height > im.width
+ if landscape:
+ move_idx = [0, 2]
+ move_max = im.size[0]
+ elif portrait:
+ move_idx = [1, 3]
+ move_max = im.size[1]
+ else:
+ return []
+
+ e_max = 0
+ crop_current = [0, 0, settings.crop_width, settings.crop_height]
+ crop_best = crop_current
+ while crop_current[move_idx[1]] < move_max:
+ crop = im.crop(tuple(crop_current))
+ e = image_entropy(crop)
+
+ if (e > e_max):
+ e_max = e
+ crop_best = list(crop_current)
+
+ crop_current[move_idx[0]] += 4
+ crop_current[move_idx[1]] += 4
+
+ x_mid = int(crop_best[0] + settings.crop_width/2)
+ y_mid = int(crop_best[1] + settings.crop_height/2)
+
+ return [PointOfInterest(x_mid, y_mid, size=25, weight=1.0)]
+
+
+def image_entropy(im):
+ # greyscale image entropy
+ # band = np.asarray(im.convert("L"))
+ band = np.asarray(im.convert("1"), dtype=np.uint8)
+ hist, _ = np.histogram(band, bins=range(0, 256))
+ hist = hist[hist > 0]
+ return -np.log2(hist / hist.sum()).sum()
+
+def centroid(pois):
+ x = [poi.x for poi in pois]
+ y = [poi.y for poi in pois]
+ return PointOfInterest(sum(x)/len(pois), sum(y)/len(pois))
+
+
+def poi_average(pois, settings):
+ weight = 0.0
+ x = 0.0
+ y = 0.0
+ for poi in pois:
+ weight += poi.weight
+ x += poi.x * poi.weight
+ y += poi.y * poi.weight
+ avg_x = round(x / weight)
+ avg_y = round(y / weight)
+
+ return PointOfInterest(avg_x, avg_y)
+
+
+def is_landscape(w, h):
+ return w > h
+
+
+def is_portrait(w, h):
+ return h > w
+
+
+def is_square(w, h):
+ return w == h
+
+
+def download_and_cache_models(dirname):
+ download_url = 'https://github.com/opencv/opencv_zoo/blob/91fb0290f50896f38a0ab1e558b74b16bc009428/models/face_detection_yunet/face_detection_yunet_2022mar.onnx?raw=true'
+ model_file_name = 'face_detection_yunet.onnx'
+
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+
+ cache_file = os.path.join(dirname, model_file_name)
+ if not os.path.exists(cache_file):
+ print(f"downloading face detection model from '{download_url}' to '{cache_file}'")
+ response = requests.get(download_url)
+ with open(cache_file, "wb") as f:
+ f.write(response.content)
+
+ if os.path.exists(cache_file):
+ return cache_file
+ return None
+
+
+class PointOfInterest:
+ def __init__(self, x, y, weight=1.0, size=10):
+ self.x = x
+ self.y = y
+ self.weight = weight
+ self.size = size
+
+ def bounding(self, size):
+ return [
+ self.x - size//2,
+ self.y - size//2,
+ self.x + size//2,
+ self.y + size//2
+ ]
+
+
+class Settings:
+ def __init__(self, crop_width=512, crop_height=512, corner_points_weight=0.5, entropy_points_weight=0.5, face_points_weight=0.5, annotate_image=False, dnn_model_path=None):
+ self.crop_width = crop_width
+ self.crop_height = crop_height
+ self.corner_points_weight = corner_points_weight
+ self.entropy_points_weight = entropy_points_weight
+ self.face_points_weight = face_points_weight
+ self.annotate_image = annotate_image
+ self.destop_view_image = False
+ self.dnn_model_path = dnn_model_path
\ No newline at end of file diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 5b1c5002..8bb00d27 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -86,12 +86,12 @@ class PersonalizedBase(Dataset): assert len(self.dataset) > 0, "No images have been found in the dataset."
self.length = len(self.dataset) * repeats // batch_size
- self.initial_indexes = np.arange(len(self.dataset))
+ self.dataset_length = len(self.dataset)
self.indexes = None
self.shuffle()
def shuffle(self):
- self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0]).numpy()]
+ self.indexes = np.random.permutation(self.dataset_length)
def create_text(self, filename_text):
text = random.choice(self.lines)
diff --git a/modules/textual_inversion/learn_schedule.py b/modules/textual_inversion/learn_schedule.py index 2062726a..3a736065 100644 --- a/modules/textual_inversion/learn_schedule.py +++ b/modules/textual_inversion/learn_schedule.py @@ -52,7 +52,7 @@ class LearnRateScheduler: self.finished = False
def apply(self, optimizer, step_number):
- if step_number <= self.end_step:
+ if step_number < self.end_step:
return
try:
diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 33eaddb6..e13b1894 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,12 +7,14 @@ import tqdm import time
from modules import shared, images
+from modules.paths import models_path
from modules.shared import opts, cmd_opts
+from modules.textual_inversion import autocrop
if cmd_opts.deepdanbooru:
import modules.deepbooru as deepbooru
-def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
+def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False):
try:
if process_caption:
shared.interrogator.load()
@@ -22,7 +24,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
- preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio)
+ preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug)
finally:
@@ -34,7 +36,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce -def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
+def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False):
width = process_width
height = process_height
src = os.path.abspath(process_src)
@@ -113,6 +115,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre splitted = image.crop((0, y, to_w, y + to_h))
yield splitted
+
for index, imagefile in enumerate(tqdm.tqdm(files)):
subindex = [0]
filename = os.path.join(src, imagefile)
@@ -137,11 +140,36 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre ratio = (img.height * width) / (img.width * height)
inverse_xy = True
+ process_default_resize = True
+
if process_split and ratio < 1.0 and ratio <= split_threshold:
for splitted in split_pic(img, inverse_xy):
save_pic(splitted, index, existing_caption=existing_caption)
- else:
+ process_default_resize = False
+
+ if process_focal_crop and img.height != img.width:
+
+ dnn_model_path = None
+ try:
+ dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv"))
+ except Exception as e:
+ print("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", e)
+
+ autocrop_settings = autocrop.Settings(
+ crop_width = width,
+ crop_height = height,
+ face_points_weight = process_focal_crop_face_weight,
+ entropy_points_weight = process_focal_crop_entropy_weight,
+ corner_points_weight = process_focal_crop_edges_weight,
+ annotate_image = process_focal_crop_debug,
+ dnn_model_path = dnn_model_path,
+ )
+ for focal in autocrop.crop_image(img, autocrop_settings):
+ save_pic(focal, index, existing_caption=existing_caption)
+ process_default_resize = False
+
+ if process_default_resize:
img = images.resize_image(1, img, width, height)
save_pic(img, index, existing_caption=existing_caption)
- shared.state.nextjob()
+ shared.state.nextjob()
\ No newline at end of file diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 529ed3e2..17dfb223 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -10,7 +10,7 @@ import csv from PIL import Image, PngImagePlugin
-from modules import shared, devices, sd_hijack, processing, sd_models
+from modules import shared, devices, sd_hijack, processing, sd_models, images
import modules.textual_inversion.dataset
from modules.textual_inversion.learn_schedule import LearnRateScheduler
@@ -157,6 +157,9 @@ def create_embedding(name, num_vectors_per_token, overwrite_old, init_text='*'): cond_model = shared.sd_model.cond_stage_model
embedding_layer = cond_model.wrapped.transformer.text_model.embeddings
+ with devices.autocast():
+ cond_model([""]) # will send cond model to GPU if lowvram/medvram is active
+
ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"]
embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device)
@@ -164,6 +167,8 @@ def create_embedding(name, num_vectors_per_token, overwrite_old, init_text='*'): for i in range(num_vectors_per_token):
vec[i] = embedded[i * int(embedded.shape[0]) // num_vectors_per_token]
+ # Remove illegal characters from name.
+ name = "".join( x for x in name if (x.isalnum() or x in "._- "))
fn = os.path.join(shared.cmd_opts.embeddings_dir, f"{name}.pt")
if not overwrite_old:
assert not os.path.exists(fn), f"file {fn} already exists"
@@ -179,9 +184,8 @@ def write_loss(log_directory, filename, step, epoch_len, values): if shared.opts.training_write_csv_every == 0:
return
- if step % shared.opts.training_write_csv_every != 0:
+ if (step + 1) % shared.opts.training_write_csv_every != 0:
return
-
write_csv_header = False if os.path.exists(os.path.join(log_directory, filename)) else True
with open(os.path.join(log_directory, filename), "a+", newline='') as fout:
@@ -191,11 +195,11 @@ def write_loss(log_directory, filename, step, epoch_len, values): csv_writer.writeheader()
epoch = step // epoch_len
- epoch_step = step - epoch * epoch_len
+ epoch_step = step % epoch_len
csv_writer.writerow({
"step": step + 1,
- "epoch": epoch + 1,
+ "epoch": epoch,
"epoch_step": epoch_step + 1,
**values,
})
@@ -244,6 +248,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc last_saved_file = "<none>"
last_saved_image = "<none>"
+ forced_filename = "<none>"
embedding_yet_to_be_embedded = False
ititial_step = embedding.step or 0
@@ -276,14 +281,17 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc loss.backward()
optimizer.step()
+ steps_done = embedding.step + 1
epoch_num = embedding.step // len(ds)
- epoch_step = embedding.step - (epoch_num * len(ds)) + 1
+ epoch_step = embedding.step % len(ds)
- pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{len(ds)}]loss: {losses.mean():.7f}")
+ pbar.set_description(f"[Epoch {epoch_num}: {epoch_step+1}/{len(ds)}]loss: {losses.mean():.7f}")
- if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0:
- last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt')
+ if embedding_dir is not None and steps_done % save_embedding_every == 0:
+ # Before saving, change name to match current checkpoint.
+ embedding.name = f'{embedding_name}-{steps_done}'
+ last_saved_file = os.path.join(embedding_dir, f'{embedding.name}.pt')
embedding.save(last_saved_file)
embedding_yet_to_be_embedded = True
@@ -292,9 +300,9 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc "learn_rate": scheduler.learn_rate
})
- if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0:
- last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png')
-
+ if images_dir is not None and steps_done % create_image_every == 0:
+ forced_filename = f'{embedding_name}-{steps_done}'
+ last_saved_image = os.path.join(images_dir, forced_filename)
p = processing.StableDiffusionProcessingTxt2Img(
sd_model=shared.sd_model,
do_not_save_grid=True,
@@ -326,7 +334,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc if save_image_with_stored_embedding and os.path.exists(last_saved_file) and embedding_yet_to_be_embedded:
- last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png')
+ last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{steps_done}.png')
info = PngImagePlugin.PngInfo()
data = torch.load(last_saved_file)
@@ -342,7 +350,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc checkpoint = sd_models.select_checkpoint()
footer_left = checkpoint.model_name
footer_mid = '[{}]'.format(checkpoint.hash)
- footer_right = '{}v {}s'.format(vectorSize, embedding.step)
+ footer_right = '{}v {}s'.format(vectorSize, steps_done)
captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right)
captioned_image = insert_image_data_embed(captioned_image, data)
@@ -350,8 +358,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info)
embedding_yet_to_be_embedded = False
- image.save(last_saved_image)
-
+ last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
last_saved_image += f", prompt: {preview_text}"
shared.state.job_no = embedding.step
@@ -371,6 +378,9 @@ Last saved image: {html.escape(last_saved_image)}<br/> embedding.sd_checkpoint = checkpoint.hash
embedding.sd_checkpoint_name = checkpoint.model_name
embedding.cached_checksum = None
+ # Before saving for the last time, change name back to base name (as opposed to the save_embedding_every step-suffixed naming convention).
+ embedding.name = embedding_name
+ filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding.name}.pt')
embedding.save(filename)
return embedding, filename
diff --git a/modules/ui.py b/modules/ui.py index 03528968..b2e1805f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1,6 +1,4 @@ -import base64
import html
-import io
import json
import math
import mimetypes
@@ -18,13 +16,8 @@ import gradio as gr import gradio.routes
import gradio.utils
import numpy as np
-import piexif
-import torch
from PIL import Image, PngImagePlugin
-import gradio as gr
-import gradio.utils
-import gradio.routes
from modules import sd_hijack, sd_models, localization, script_callbacks
from modules.paths import script_path
@@ -35,7 +28,7 @@ if cmd_opts.deepdanbooru: from modules.deepbooru import get_deepbooru_tags
import modules.codeformer_model
-import modules.generation_parameters_copypaste
+import modules.generation_parameters_copypaste as parameters_copypaste
import modules.gfpgan_model
import modules.hypernetworks.ui
import modules.ldsr_model
@@ -49,13 +42,11 @@ from modules.sd_hijack import model_hijack from modules.sd_samplers import samplers, samplers_for_img2img
import modules.textual_inversion.ui
import modules.hypernetworks.ui
+from modules.generation_parameters_copypaste import image_from_url_text
# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI
mimetypes.init()
mimetypes.add_type('application/javascript', '.js')
-txt2img_paste_fields = []
-img2img_paste_fields = []
-
if not cmd_opts.share and not cmd_opts.listen:
# fix gradio phoning home
@@ -98,37 +89,11 @@ def plaintext_to_html(text): text = "<p>" + "<br>\n".join([f"{html.escape(x)}" for x in text.split('\n')]) + "</p>"
return text
-
-def image_from_url_text(filedata):
- if type(filedata) == dict and filedata["is_file"]:
- filename = filedata["name"]
- tempdir = os.path.normpath(tempfile.gettempdir())
- normfn = os.path.normpath(filename)
- assert normfn.startswith(tempdir), 'trying to open image file not in temporary directory'
-
- return Image.open(filename)
-
- if type(filedata) == list:
- if len(filedata) == 0:
- return None
-
- filedata = filedata[0]
-
- if filedata.startswith("data:image/png;base64,"):
- filedata = filedata[len("data:image/png;base64,"):]
-
- filedata = base64.decodebytes(filedata.encode('utf-8'))
- image = Image.open(io.BytesIO(filedata))
- return image
-
-
def send_gradio_gallery_to_image(x):
if len(x) == 0:
return None
-
return image_from_url_text(x[0])
-
def save_files(js_data, images, do_make_zip, index):
import csv
filenames = []
@@ -192,7 +157,6 @@ def save_files(js_data, images, do_make_zip, index): return gr.File.update(value=fullfns, visible=True), '', '', plaintext_to_html(f"Saved: {filenames[0]}")
-
def save_pil_to_file(pil_image, dir=None):
use_metadata = False
metadata = PngImagePlugin.PngInfo()
@@ -625,6 +589,82 @@ def create_refresh_button(refresh_component, refresh_method, refreshed_args, ele )
return refresh_button
+def create_output_panel(tabname, outdir):
+ def open_folder(f):
+ if not os.path.exists(f):
+ print(f'Folder "{f}" does not exist. After you create an image, the folder will be created.')
+ return
+ elif not os.path.isdir(f):
+ print(f"""
+WARNING
+An open_folder request was made with an argument that is not a folder.
+This could be an error or a malicious attempt to run code on your computer.
+Requested path was: {f}
+""", file=sys.stderr)
+ return
+
+ if not shared.cmd_opts.hide_ui_dir_config:
+ path = os.path.normpath(f)
+ if platform.system() == "Windows":
+ os.startfile(path)
+ elif platform.system() == "Darwin":
+ sp.Popen(["open", path])
+ else:
+ sp.Popen(["xdg-open", path])
+
+ with gr.Column(variant='panel'):
+ with gr.Group():
+ result_gallery = gr.Gallery(label='Output', show_label=False, elem_id=f"{tabname}_gallery").style(grid=4)
+
+ generation_info = None
+ with gr.Column():
+ with gr.Row():
+ if tabname != "extras":
+ save = gr.Button('Save')
+
+ buttons = parameters_copypaste.create_buttons(["img2img", "inpaint", "extras"])
+ button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder'
+ open_folder_button = gr.Button(folder_symbol, elem_id=button_id)
+
+ open_folder_button.click(
+ fn=lambda: open_folder(opts.outdir_samples or outdir),
+ inputs=[],
+ outputs=[],
+ )
+
+ if tabname != "extras":
+ with gr.Row():
+ do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False)
+
+ with gr.Row():
+ download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False)
+
+ with gr.Group():
+ html_info = gr.HTML()
+ generation_info = gr.Textbox(visible=False)
+
+ save.click(
+ fn=wrap_gradio_call(save_files),
+ _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
+ inputs=[
+ generation_info,
+ result_gallery,
+ do_make_zip,
+ html_info,
+ ],
+ outputs=[
+ download_files,
+ html_info,
+ html_info,
+ html_info,
+ ]
+ )
+ else:
+ html_info_x = gr.HTML()
+ html_info = gr.HTML()
+ parameters_copypaste.bind_buttons(buttons, result_gallery, "txt2img" if tabname == "txt2img" else None)
+ return result_gallery, generation_info if tabname != "extras" else html_info_x, html_info
+
def create_ui(wrap_gradio_gpu_call):
import modules.img2img
@@ -675,30 +715,9 @@ def create_ui(wrap_gradio_gpu_call): with gr.Group():
custom_inputs = modules.scripts.scripts_txt2img.setup_ui(is_img2img=False)
- with gr.Column(variant='panel'):
- with gr.Group():
- txt2img_preview = gr.Image(elem_id='txt2img_preview', visible=False)
- txt2img_gallery = gr.Gallery(label='Output', show_label=False, elem_id='txt2img_gallery').style(grid=4)
- with gr.Column():
- with gr.Row():
- save = gr.Button('Save')
- send_to_img2img = gr.Button('Send to img2img')
- send_to_inpaint = gr.Button('Send to inpaint')
- send_to_extras = gr.Button('Send to extras')
- button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder'
- open_txt2img_folder = gr.Button(folder_symbol, elem_id=button_id)
-
- with gr.Row():
- do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False)
-
- with gr.Row():
- download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False)
-
- with gr.Group():
- html_info = gr.HTML()
- generation_info = gr.Textbox(visible=False)
+ txt2img_gallery, generation_info, html_info = create_output_panel("txt2img", opts.outdir_txt2img_samples)
connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False)
connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True)
@@ -756,23 +775,6 @@ def create_ui(wrap_gradio_gpu_call): outputs=[hr_options],
)
- save.click(
- fn=wrap_gradio_call(save_files),
- _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
- inputs=[
- generation_info,
- txt2img_gallery,
- do_make_zip,
- html_info,
- ],
- outputs=[
- download_files,
- html_info,
- html_info,
- html_info,
- ]
- )
-
roll.click(
fn=roll_artist,
_js="update_txt2img_tokens",
@@ -784,8 +786,7 @@ def create_ui(wrap_gradio_gpu_call): ]
)
- global txt2img_paste_fields
- txt2img_paste_fields = [
+ parameters_copypaste.add_paste_fields("txt2img", None, [
(txt2img_prompt, "Prompt"),
(txt2img_negative_prompt, "Negative prompt"),
(steps, "Steps"),
@@ -806,7 +807,7 @@ def create_ui(wrap_gradio_gpu_call): (firstphase_width, "First pass size-1"),
(firstphase_height, "First pass size-2"),
*modules.scripts.scripts_txt2img.infotext_fields
- ]
+ ])
txt2img_preview_params = [
txt2img_prompt,
@@ -893,30 +894,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Group():
custom_inputs = modules.scripts.scripts_img2img.setup_ui(is_img2img=True)
- with gr.Column(variant='panel'):
-
- with gr.Group():
- img2img_preview = gr.Image(elem_id='img2img_preview', visible=False)
- img2img_gallery = gr.Gallery(label='Output', show_label=False, elem_id='img2img_gallery').style(grid=4)
-
- with gr.Column():
- with gr.Row():
- save = gr.Button('Save')
- img2img_send_to_img2img = gr.Button('Send to img2img')
- img2img_send_to_inpaint = gr.Button('Send to inpaint')
- img2img_send_to_extras = gr.Button('Send to extras')
- button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder'
- open_img2img_folder = gr.Button(folder_symbol, elem_id=button_id)
-
- with gr.Row():
- do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False)
-
- with gr.Row():
- download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False)
-
- with gr.Group():
- html_info = gr.HTML()
- generation_info = gr.Textbox(visible=False)
+ img2img_gallery, generation_info, html_info = create_output_panel("img2img", opts.outdir_img2img_samples)
connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False)
connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True)
@@ -1003,25 +981,9 @@ def create_ui(wrap_gradio_gpu_call): fn=interrogate_deepbooru,
inputs=[init_img],
outputs=[img2img_prompt],
- )
-
- save.click(
- fn=wrap_gradio_call(save_files),
- _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
- inputs=[
- generation_info,
- img2img_gallery,
- do_make_zip,
- html_info,
- ],
- outputs=[
- download_files,
- html_info,
- html_info,
- html_info,
- ]
)
+
roll.click(
fn=roll_artist,
_js="update_img2img_tokens",
@@ -1055,7 +1017,8 @@ def create_ui(wrap_gradio_gpu_call): outputs=[prompt, negative_prompt, style1, style2],
)
- global img2img_paste_fields
+ token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter])
+
img2img_paste_fields = [
(img2img_prompt, "Prompt"),
(img2img_negative_prompt, "Negative prompt"),
@@ -1074,7 +1037,9 @@ def create_ui(wrap_gradio_gpu_call): (denoising_strength, "Denoising strength"),
*modules.scripts.scripts_img2img.infotext_fields
]
- token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter])
+ parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields)
+ parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields)
+
with gr.Blocks(analytics_enabled=False) as extras_interface:
with gr.Row().style(equal_height=False):
@@ -1104,9 +1069,9 @@ def create_ui(wrap_gradio_gpu_call): upscaling_resize_w = gr.Number(label="Width", value=512, precision=0)
upscaling_resize_h = gr.Number(label="Height", value=512, precision=0)
upscaling_crop = gr.Checkbox(label='Crop to fit', value=True)
-
+
with gr.Group():
- extras_upscaler_1 = gr.Radio(label='Upscaler 1', elem_id="extras_upscaler_1", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
+ extras_upscaler_1 = gr.Radio(label='Upscaler 1', elem_id="extras_upscaler_1", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
with gr.Group():
extras_upscaler_2 = gr.Radio(label='Upscaler 2', elem_id="extras_upscaler_2", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
@@ -1119,17 +1084,13 @@ def create_ui(wrap_gradio_gpu_call): codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer visibility", value=0, interactive=modules.codeformer_model.have_codeformer)
codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer weight (0 = maximum effect, 1 = minimum effect)", value=0, interactive=modules.codeformer_model.have_codeformer)
+ with gr.Group():
+ upscale_before_face_fix = gr.Checkbox(label='Upscale Before Restoring Faces', value=False)
+
submit = gr.Button('Generate', elem_id="extras_generate", variant='primary')
- with gr.Column(variant='panel'):
- result_images = gr.Gallery(label="Result", show_label=False)
- html_info_x = gr.HTML()
- html_info = gr.HTML()
- extras_send_to_img2img = gr.Button('Send to img2img')
- extras_send_to_inpaint = gr.Button('Send to inpaint')
- button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else ''
- open_extras_folder = gr.Button('Open output directory', elem_id=button_id)
+ result_images, html_info_x, html_info = create_output_panel("extras", opts.outdir_extras_samples)
submit.click(
fn=wrap_gradio_gpu_call(modules.extras.run_extras),
@@ -1152,6 +1113,7 @@ def create_ui(wrap_gradio_gpu_call): extras_upscaler_1,
extras_upscaler_2,
extras_upscaler_2_visibility,
+ upscale_before_face_fix,
],
outputs=[
result_images,
@@ -1159,19 +1121,12 @@ def create_ui(wrap_gradio_gpu_call): html_info,
]
)
+ parameters_copypaste.add_paste_fields("extras", extras_image, None)
- extras_send_to_img2img.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery_img2img",
- inputs=[result_images],
- outputs=[init_img],
- )
- extras_send_to_inpaint.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery_inpaint",
- inputs=[result_images],
- outputs=[init_img_with_mask],
+ extras_image.change(
+ fn=modules.extras.clear_cache,
+ inputs=[], outputs=[]
)
with gr.Blocks(analytics_enabled=False) as pnginfo_interface:
@@ -1183,17 +1138,16 @@ def create_ui(wrap_gradio_gpu_call): html = gr.HTML()
generation_info = gr.Textbox(visible=False)
html2 = gr.HTML()
-
with gr.Row():
- pnginfo_send_to_txt2img = gr.Button('Send to txt2img')
- pnginfo_send_to_img2img = gr.Button('Send to img2img')
+ buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "inpaint", "extras"])
+ parameters_copypaste.bind_buttons(buttons, image, generation_info)
image.change(
fn=wrap_gradio_call(modules.extras.run_pnginfo),
inputs=[image],
outputs=[html, generation_info, html2],
)
-
+
with gr.Blocks() as modelmerger_interface:
with gr.Row().style(equal_height=False):
with gr.Column(variant='panel'):
@@ -1238,7 +1192,8 @@ def create_ui(wrap_gradio_gpu_call): new_hypernetwork_name = gr.Textbox(label="Name")
new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
- new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=["linear", "relu", "leakyrelu", "elu", "swish"])
+ new_hypernetwork_activation_func = gr.Dropdown(value="linear", label="Select activation function of hypernetwork", choices=modules.hypernetworks.ui.keys)
+ new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. relu-like - Kaiming, sigmoid-like - Xavier is recommended", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"])
new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization")
new_hypernetwork_use_dropout = gr.Checkbox(label="Use dropout")
overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork")
@@ -1260,6 +1215,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row():
process_flip = gr.Checkbox(label='Create flipped copies')
process_split = gr.Checkbox(label='Split oversized images')
+ process_focal_crop = gr.Checkbox(label='Auto focal point crop')
process_caption = gr.Checkbox(label='Use BLIP for caption')
process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False)
@@ -1267,6 +1223,12 @@ def create_ui(wrap_gradio_gpu_call): process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05)
+ with gr.Row(visible=False) as process_focal_crop_row:
+ process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05)
+ process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05)
+ process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
+ process_focal_crop_debug = gr.Checkbox(label='Create debug image')
+
with gr.Row():
with gr.Column(scale=3):
gr.HTML(value="")
@@ -1280,6 +1242,12 @@ def create_ui(wrap_gradio_gpu_call): outputs=[process_split_extra_row],
)
+ process_focal_crop.change(
+ fn=lambda show: gr_show(show),
+ inputs=[process_focal_crop],
+ outputs=[process_focal_crop_row],
+ )
+
with gr.Tab(label="Train"):
gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
with gr.Row():
@@ -1342,6 +1310,7 @@ def create_ui(wrap_gradio_gpu_call): overwrite_old_hypernetwork,
new_hypernetwork_layer_structure,
new_hypernetwork_activation_func,
+ new_hypernetwork_initialization_option,
new_hypernetwork_add_layer_norm,
new_hypernetwork_use_dropout
],
@@ -1367,6 +1336,11 @@ def create_ui(wrap_gradio_gpu_call): process_caption_deepbooru,
process_split_threshold,
process_overlap_ratio,
+ process_focal_crop,
+ process_focal_crop_face_weight,
+ process_focal_crop_entropy_weight,
+ process_focal_crop_edges_weight,
+ process_focal_crop_debug,
],
outputs=[
ti_output,
@@ -1471,28 +1445,6 @@ def create_ui(wrap_gradio_gpu_call): script_callbacks.ui_settings_callback()
opts.reorder()
- def open_folder(f):
- if not os.path.exists(f):
- print(f'Folder "{f}" does not exist. After you create an image, the folder will be created.')
- return
- elif not os.path.isdir(f):
- print(f"""
-WARNING
-An open_folder request was made with an argument that is not a folder.
-This could be an error or a malicious attempt to run code on your computer.
-Requested path was: {f}
-""", file=sys.stderr)
- return
-
- if not shared.cmd_opts.hide_ui_dir_config:
- path = os.path.normpath(f)
- if platform.system() == "Windows":
- os.startfile(path)
- elif platform.system() == "Darwin":
- sp.Popen(["open", path])
- else:
- sp.Popen(["xdg-open", path])
-
def run_settings(*args):
changed = 0
@@ -1637,6 +1589,9 @@ Requested path was: {f} if column is not None:
column.__exit__()
+
+
+
interfaces = [
(txt2img_interface, "txt2img", "txt2img"),
(img2img_interface, "img2img", "img2img"),
@@ -1727,68 +1682,7 @@ Requested path was: {f} component_dict['sd_model_checkpoint'],
]
)
- paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration', 'Seed', 'Size-1', 'Size-2']
- txt2img_fields = [field for field,name in txt2img_paste_fields if name in paste_field_names]
- img2img_fields = [field for field,name in img2img_paste_fields if name in paste_field_names]
- send_to_img2img.click(
- fn=lambda img, *args: (image_from_url_text(img),*args),
- _js="(gallery, ...args) => [extract_image_from_gallery_img2img(gallery), ...args]",
- inputs=[txt2img_gallery] + txt2img_fields,
- outputs=[init_img] + img2img_fields,
- )
- send_to_inpaint.click(
- fn=lambda x, *args: (image_from_url_text(x), *args),
- _js="(gallery, ...args) => [extract_image_from_gallery_inpaint(gallery), ...args]",
- inputs=[txt2img_gallery] + txt2img_fields,
- outputs=[init_img_with_mask] + img2img_fields,
- )
-
- img2img_send_to_img2img.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery_img2img",
- inputs=[img2img_gallery],
- outputs=[init_img],
- )
-
- img2img_send_to_inpaint.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery_inpaint",
- inputs=[img2img_gallery],
- outputs=[init_img_with_mask],
- )
-
- send_to_extras.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery_extras",
- inputs=[txt2img_gallery],
- outputs=[extras_image],
- )
-
- open_txt2img_folder.click(
- fn=lambda: open_folder(opts.outdir_samples or opts.outdir_txt2img_samples),
- inputs=[],
- outputs=[],
- )
-
- open_img2img_folder.click(
- fn=lambda: open_folder(opts.outdir_samples or opts.outdir_img2img_samples),
- inputs=[],
- outputs=[],
- )
-
- open_extras_folder.click(
- fn=lambda: open_folder(opts.outdir_samples or opts.outdir_extras_samples),
- inputs=[],
- outputs=[],
- )
-
- img2img_send_to_extras.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery_extras",
- inputs=[img2img_gallery],
- outputs=[extras_image],
- )
settings_map = {
'sd_hypernetwork': 'Hypernet',
@@ -1796,16 +1690,7 @@ Requested path was: {f} 'sd_model_checkpoint': 'Model hash',
}
- settings_paste_fields = [
- (component_dict[k], lambda d, k=k, v=v: apply_setting(k, d.get(v, None)))
- for k, v in settings_map.items()
- ]
-
- modules.generation_parameters_copypaste.connect_paste(txt2img_paste, txt2img_paste_fields + settings_paste_fields, txt2img_prompt)
- modules.generation_parameters_copypaste.connect_paste(img2img_paste, img2img_paste_fields + settings_paste_fields, img2img_prompt)
-
- modules.generation_parameters_copypaste.connect_paste(pnginfo_send_to_txt2img, txt2img_paste_fields + settings_paste_fields, generation_info, 'switch_to_txt2img')
- modules.generation_parameters_copypaste.connect_paste(pnginfo_send_to_img2img, img2img_paste_fields + settings_paste_fields, generation_info, 'switch_to_img2img_img2img')
+ parameters_copypaste.run_bind()
ui_config_file = cmd_opts.ui_config_file
ui_settings = {}
@@ -1885,7 +1770,7 @@ def load_javascript(raw_response): javascript = f'<script>{jsfile.read()}</script>'
scripts_list = modules.scripts.list_scripts("javascript", ".js")
-
+
for basedir, filename, path in scripts_list:
with open(path, "r", encoding="utf8") as jsfile:
javascript += f"\n<!-- {filename} --><script>{jsfile.read()}</script>"
|