From 59a2b9e5afc27d2fda72069ca0635070535d18fe Mon Sep 17 00:00:00 2001 From: Greendayle Date: Wed, 5 Oct 2022 20:50:10 +0200 Subject: deepdanbooru interrogator --- modules/deepbooru.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 modules/deepbooru.py (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py new file mode 100644 index 00000000..958b1c3d --- /dev/null +++ b/modules/deepbooru.py @@ -0,0 +1,60 @@ +import os.path +from concurrent.futures import ProcessPoolExecutor + +import numpy as np +import deepdanbooru as dd +import tensorflow as tf + + +def _load_tf_and_return_tags(pil_image, threshold): + this_folder = os.path.dirname(__file__) + model_path = os.path.join(this_folder, '..', 'models', 'deepbooru', 'deepdanbooru-v3-20211112-sgd-e28') + if not os.path.exists(model_path): + return "Download https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip unpack and put into models/deepbooru" + + tags = dd.project.load_tags_from_project(model_path) + model = dd.project.load_model_from_project( + model_path, compile_model=True + ) + + width = model.input_shape[2] + height = model.input_shape[1] + image = np.array(pil_image) + image = tf.image.resize( + image, + size=(height, width), + method=tf.image.ResizeMethod.AREA, + preserve_aspect_ratio=True, + ) + image = image.numpy() # EagerTensor to np.array + image = dd.image.transform_and_pad_image(image, width, height) + image = image / 255.0 + image_shape = image.shape + image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2])) + + y = model.predict(image)[0] + + result_dict = {} + + for i, tag in enumerate(tags): + result_dict[tag] = y[i] + + + + result_tags_out = [] + result_tags_print = [] + for tag in tags: + if result_dict[tag] >= threshold: + result_tags_out.append(tag) + result_tags_print.append(f'{result_dict[tag]} {tag}') + + print('\n'.join(sorted(result_tags_print, reverse=True))) + + return ', '.join(result_tags_out) + + +def get_deepbooru_tags(pil_image, threshold=0.5): + with ProcessPoolExecutor() as executor: + f = executor.submit(_load_tf_and_return_tags, pil_image, threshold) + ret = f.result() # will rethrow any exceptions + return ret \ No newline at end of file -- cgit v1.2.3 From 1506fab29ad54beb9f52236912abc432209c8089 Mon Sep 17 00:00:00 2001 From: Greendayle Date: Wed, 5 Oct 2022 21:15:08 +0200 Subject: removing problematic tag --- modules/deepbooru.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 958b1c3d..841cb9c5 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -38,13 +38,12 @@ def _load_tf_and_return_tags(pil_image, threshold): for i, tag in enumerate(tags): result_dict[tag] = y[i] - - - result_tags_out = [] result_tags_print = [] for tag in tags: if result_dict[tag] >= threshold: + if tag.startswith("rating:"): + continue result_tags_out.append(tag) result_tags_print.append(f'{result_dict[tag]} {tag}') -- cgit v1.2.3 From 17a99baf0c929e5df4dfc4b2a96aa3890a141112 Mon Sep 17 00:00:00 2001 From: Greendayle Date: Wed, 5 Oct 2022 22:05:24 +0200 Subject: better model search --- modules/deepbooru.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 841cb9c5..a64fd9cd 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -9,8 +9,15 @@ import tensorflow as tf def _load_tf_and_return_tags(pil_image, threshold): this_folder = os.path.dirname(__file__) model_path = os.path.join(this_folder, '..', 'models', 'deepbooru', 'deepdanbooru-v3-20211112-sgd-e28') - if not os.path.exists(model_path): - return "Download https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip unpack and put into models/deepbooru" + + model_good = False + for path_candidate in [model_path, os.path.dirname(model_path)]: + if os.path.exists(os.path.join(path_candidate, 'project.json')): + model_path = path_candidate + model_good = True + if not model_good: + return ("Download https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/" + "deepdanbooru-v3-20211112-sgd-e28.zip unpack and put into models/deepbooru") tags = dd.project.load_tags_from_project(model_path) model = dd.project.load_model_from_project( -- cgit v1.2.3 From 4320f386d9641c7c234589c4cb0c0c6cbeb156ad Mon Sep 17 00:00:00 2001 From: Greendayle Date: Wed, 5 Oct 2022 22:39:32 +0200 Subject: removing underscores and colons --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index a64fd9cd..fb5018a6 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -56,7 +56,7 @@ def _load_tf_and_return_tags(pil_image, threshold): print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out) + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') def get_deepbooru_tags(pil_image, threshold=0.5): -- cgit v1.2.3 From 54fa613c8391e3973cca9d94cdf539061932508b Mon Sep 17 00:00:00 2001 From: Greendayle Date: Fri, 7 Oct 2022 20:37:43 +0200 Subject: loading tf only in interrogation process --- modules/deepbooru.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index fb5018a6..79dc59bd 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,12 +1,13 @@ import os.path from concurrent.futures import ProcessPoolExecutor -import numpy as np -import deepdanbooru as dd -import tensorflow as tf def _load_tf_and_return_tags(pil_image, threshold): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np + this_folder = os.path.dirname(__file__) model_path = os.path.join(this_folder, '..', 'models', 'deepbooru', 'deepdanbooru-v3-20211112-sgd-e28') -- cgit v1.2.3 From fa2ea648db81f5723bb5d722f2fe0ebd7dfc319a Mon Sep 17 00:00:00 2001 From: Greendayle Date: Fri, 7 Oct 2022 20:46:38 +0200 Subject: even more powerfull fix --- modules/deepbooru.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 79dc59bd..60094336 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -60,8 +60,13 @@ def _load_tf_and_return_tags(pil_image, threshold): return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') +def subprocess_init_no_cuda(): + import os + os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + + def get_deepbooru_tags(pil_image, threshold=0.5): - with ProcessPoolExecutor() as executor: - f = executor.submit(_load_tf_and_return_tags, pil_image, threshold) + with ProcessPoolExecutor(initializer=subprocess_init_no_cuda) as executor: + f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, ) ret = f.result() # will rethrow any exceptions return ret \ No newline at end of file -- cgit v1.2.3 From 5f12e7efd92ad802742f96788b4be3249ad02829 Mon Sep 17 00:00:00 2001 From: Greendayle Date: Fri, 7 Oct 2022 20:58:30 +0200 Subject: linux test --- modules/deepbooru.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 60094336..781b2249 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,6 +1,6 @@ import os.path from concurrent.futures import ProcessPoolExecutor - +from multiprocessing import get_context def _load_tf_and_return_tags(pil_image, threshold): @@ -66,7 +66,8 @@ def subprocess_init_no_cuda(): def get_deepbooru_tags(pil_image, threshold=0.5): - with ProcessPoolExecutor(initializer=subprocess_init_no_cuda) as executor: + context = get_context('spawn') + with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor: f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, ) ret = f.result() # will rethrow any exceptions return ret \ No newline at end of file -- cgit v1.2.3 From 01f8cb44474e454903c11718e6a4f33dbde34bb8 Mon Sep 17 00:00:00 2001 From: Greendayle Date: Sat, 8 Oct 2022 18:02:56 +0200 Subject: made deepdanbooru optional, added to readme, automatic download of deepbooru model --- README.md | 2 ++ launch.py | 4 ++++ modules/deepbooru.py | 20 ++++++++++---------- modules/shared.py | 1 + modules/ui.py | 19 ++++++++++++------- requirements.txt | 3 --- requirements_versions.txt | 3 --- 7 files changed, 29 insertions(+), 23 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/README.md b/README.md index ef9b5e31..6cd7a1f9 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - separate prompts using uppercase `AND` - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2` - No token limit for prompts (original stable diffusion lets you use up to 75 tokens) +- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args) ## Installation and Running Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs. @@ -123,4 +124,5 @@ The documentation was moved from this README over to the project's [wiki](https: - Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot - CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator - Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user. +- DeepDanbooru - interrogator for anime diffusors https://github.com/KichangKim/DeepDanbooru - (You) diff --git a/launch.py b/launch.py index 61f62096..d46426eb 100644 --- a/launch.py +++ b/launch.py @@ -33,6 +33,7 @@ def extract_arg(args, name): args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test') xformers = '--xformers' in args +deepdanbooru = '--deepdanbooru' in args def repo_dir(name): @@ -132,6 +133,9 @@ if not is_installed("xformers") and xformers: elif platform.system() == "Linux": run_pip("install xformers", "xformers") +if not is_installed("deepdanbooru") and deepdanbooru: + run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru") + os.makedirs(dir_repos, exist_ok=True) git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 781b2249..7e3c0618 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -9,16 +9,16 @@ def _load_tf_and_return_tags(pil_image, threshold): import numpy as np this_folder = os.path.dirname(__file__) - model_path = os.path.join(this_folder, '..', 'models', 'deepbooru', 'deepdanbooru-v3-20211112-sgd-e28') - - model_good = False - for path_candidate in [model_path, os.path.dirname(model_path)]: - if os.path.exists(os.path.join(path_candidate, 'project.json')): - model_path = path_candidate - model_good = True - if not model_good: - return ("Download https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/" - "deepdanbooru-v3-20211112-sgd-e28.zip unpack and put into models/deepbooru") + model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) + if not os.path.exists(os.path.join(model_path, 'project.json')): + # there is no point importing these every time + import zipfile + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", + model_path) + with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: + zip_ref.extractall(model_path) + os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) tags = dd.project.load_tags_from_project(model_path) model = dd.project.load_model_from_project( diff --git a/modules/shared.py b/modules/shared.py index 02cb2722..c87b726e 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -44,6 +44,7 @@ parser.add_argument("--scunet-models-path", type=str, help="Path to directory wi parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(models_path, 'SwinIR')) parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR')) parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers") +parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator") parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.") parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") diff --git a/modules/ui.py b/modules/ui.py index 30583fe9..c5c11c3c 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -23,9 +23,10 @@ import gradio.utils import gradio.routes from modules import sd_hijack -from modules.deepbooru import get_deepbooru_tags from modules.paths import script_path from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + from modules.deepbooru import get_deepbooru_tags import modules.shared as shared from modules.sd_samplers import samplers, samplers_for_img2img from modules.sd_hijack import model_hijack @@ -437,7 +438,10 @@ def create_toprow(is_img2img): with gr.Row(scale=1): if is_img2img: interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate") - deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru") + if cmd_opts.deepdanbooru: + deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru") + else: + deepbooru = None else: interrogate = None deepbooru = None @@ -782,11 +786,12 @@ def create_ui(wrap_gradio_gpu_call): outputs=[img2img_prompt], ) - img2img_deepbooru.click( - fn=interrogate_deepbooru, - inputs=[init_img], - outputs=[img2img_prompt], - ) + if cmd_opts.deepdanbooru: + img2img_deepbooru.click( + fn=interrogate_deepbooru, + inputs=[init_img], + outputs=[img2img_prompt], + ) save.click( fn=wrap_gradio_call(save_files), diff --git a/requirements.txt b/requirements.txt index cd3953c6..81641d68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,4 @@ resize-right torchdiffeq kornia lark -deepdanbooru -tensorflow -tensorflow-io functorch diff --git a/requirements_versions.txt b/requirements_versions.txt index 2d256a54..fec3e9d5 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -22,7 +22,4 @@ resize-right==0.0.2 torchdiffeq==0.2.3 kornia==0.6.7 lark==1.1.2 -git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] -tensorflow==2.10.0 -tensorflow-io==0.27.0 functorch==0.2.1 -- cgit v1.2.3 From 1f92336be768d235c18a82acb2195b7135101ae7 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Sun, 9 Oct 2022 23:58:18 -0500 Subject: refactored the deepbooru module to improve speed on running multiple interogations in a row. Added the option to generate deepbooru tags for textual inversion preproccessing. --- modules/deepbooru.py | 84 +++++++++++++++++++++++++-------- modules/textual_inversion/preprocess.py | 22 ++++++++- modules/ui.py | 52 ++++++++++++++------ 3 files changed, 122 insertions(+), 36 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 7e3c0618..cee4a3b4 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,21 +1,74 @@ import os.path from concurrent.futures import ProcessPoolExecutor -from multiprocessing import get_context +import multiprocessing -def _load_tf_and_return_tags(pil_image, threshold): +def get_deepbooru_tags(pil_image, threshold=0.5): + """ + This method is for running only one image at a time for simple use. Used to the img2img interrogate. + """ + from modules import shared # prevents circular reference + create_deepbooru_process(threshold) + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(pil_image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + release_process() + return ret + + +def deepbooru_process(queue, deepbooru_process_return, threshold): + model, tags = get_deepbooru_tags_model() + while True: # while process is running, keep monitoring queue for new image + pil_image = queue.get() + if pil_image == "QUIT": + break + else: + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold) + + +def create_deepbooru_process(threshold=0.5): + """ + Creates deepbooru process. A queue is created to send images into the process. This enables multiple images + to be processed in a row without reloading the model or creating a new process. To return the data, a shared + dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned + to the dictionary and the method adding the image to the queue should wait for this value to be updated with + the tags. + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_manager = multiprocessing.Manager() + shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() + shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold)) + shared.deepbooru_process.start() + + +def release_process(): + """ + Stops the deepbooru process to return used memory + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_queue.put("QUIT") + shared.deepbooru_process.join() + shared.deepbooru_process_queue = None + shared.deepbooru_process = None + shared.deepbooru_process_return = None + shared.deepbooru_process_manager = None + +def get_deepbooru_tags_model(): import deepdanbooru as dd import tensorflow as tf import numpy as np - this_folder = os.path.dirname(__file__) model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) if not os.path.exists(os.path.join(model_path, 'project.json')): # there is no point importing these every time import zipfile from basicsr.utils.download_util import load_file_from_url - load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", - model_path) + load_file_from_url( + r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", + model_path) with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: zip_ref.extractall(model_path) os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) @@ -24,7 +77,13 @@ def _load_tf_and_return_tags(pil_image, threshold): model = dd.project.load_model_from_project( model_path, compile_model=True ) + return model, tags + +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np width = model.input_shape[2] height = model.input_shape[1] image = np.array(pil_image) @@ -57,17 +116,4 @@ def _load_tf_and_return_tags(pil_image, threshold): print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') - - -def subprocess_init_no_cuda(): - import os - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - - -def get_deepbooru_tags(pil_image, threshold=0.5): - context = get_context('spawn') - with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor: - f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, ) - ret = f.result() # will rethrow any exceptions - return ret \ No newline at end of file + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') \ No newline at end of file diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..9f63c9a4 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -3,11 +3,14 @@ from PIL import Image, ImageOps import platform import sys import tqdm +import time from modules import shared, images +from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + import modules.deepbooru as deepbooru - -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): +def preprocess(process_src, process_dst, process_flip, process_split, process_caption, process_caption_deepbooru=False): size = 512 src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -24,10 +27,21 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.load() + if process_caption_deepbooru: + deepbooru.create_deepbooru_process() + def save_pic_with_caption(image, index): if process_caption: caption = "-" + shared.interrogator.generate_caption(image) caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + elif process_caption_deepbooru: + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = "-" + shared.deepbooru_process_return["value"] + caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + shared.deepbooru_process_return["value"] = -1 else: caption = filename caption = os.path.splitext(caption)[0] @@ -79,6 +93,10 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.send_blip_to_ram() + if process_caption_deepbooru: + deepbooru.release_process() + + def sanitize_caption(base_path, original_caption, suffix): operating_system = platform.system().lower() if (operating_system == "windows"): diff --git a/modules/ui.py b/modules/ui.py index 2231a8ed..179e3a83 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1034,6 +1034,9 @@ def create_ui(wrap_gradio_gpu_call): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images into two') process_caption = gr.Checkbox(label='Use BLIP caption as filename') + if cmd_opts.deepdanbooru: + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') + with gr.Row(): with gr.Column(scale=3): @@ -1086,21 +1089,40 @@ def create_ui(wrap_gradio_gpu_call): ] ) - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) + if cmd_opts.deepdanbooru: + # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + process_caption_deepbooru, + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) + else: + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) train_embedding.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]), -- cgit v1.2.3 From a1a05ad2d13d0b995dbf8ecead6315f17837ef81 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 16:47:58 -0500 Subject: import time missing, added to deepbooru fixxing error on get_deepbooru_tags --- modules/deepbooru.py | 1 + 1 file changed, 1 insertion(+) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index cee4a3b4..12555b2e 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,6 +1,7 @@ import os.path from concurrent.futures import ProcessPoolExecutor import multiprocessing +import time def get_deepbooru_tags(pil_image, threshold=0.5): -- cgit v1.2.3 From b980e7188c671fc55b26557f097076fb5c976ba0 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 16:52:54 -0500 Subject: corrected tag return in get_deepbooru_tags --- modules/deepbooru.py | 1 - 1 file changed, 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 12555b2e..ebdba5e0 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -15,7 +15,6 @@ def get_deepbooru_tags(pil_image, threshold=0.5): while shared.deepbooru_process_return["value"] == -1: time.sleep(0.2) release_process() - return ret def deepbooru_process(queue, deepbooru_process_return, threshold): -- cgit v1.2.3 From 76ef3d75f61253516c024553335d9083d9660a8a Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 18:01:49 -0500 Subject: added deepbooru settings (threshold and sort by alpha or likelyhood) --- modules/deepbooru.py | 36 +++++++++++++++++++++++++----------- modules/shared.py | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index ebdba5e0..e31e92c0 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -3,31 +3,32 @@ from concurrent.futures import ProcessPoolExecutor import multiprocessing import time - -def get_deepbooru_tags(pil_image, threshold=0.5): +def get_deepbooru_tags(pil_image): """ This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(threshold) + create_deepbooru_process(shared.opts.deepbooru_threshold, shared.opts.deepbooru_sort_alpha) shared.deepbooru_process_return["value"] = -1 shared.deepbooru_process_queue.put(pil_image) while shared.deepbooru_process_return["value"] == -1: time.sleep(0.2) + tags = shared.deepbooru_process_return["value"] release_process() + return tags -def deepbooru_process(queue, deepbooru_process_return, threshold): +def deepbooru_process(queue, deepbooru_process_return, threshold, alpha_sort): model, tags = get_deepbooru_tags_model() while True: # while process is running, keep monitoring queue for new image pil_image = queue.get() if pil_image == "QUIT": break else: - deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold) + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) -def create_deepbooru_process(threshold=0.5): +def create_deepbooru_process(threshold, alpha_sort): """ Creates deepbooru process. A queue is created to send images into the process. This enables multiple images to be processed in a row without reloading the model or creating a new process. To return the data, a shared @@ -40,7 +41,7 @@ def create_deepbooru_process(threshold=0.5): shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold)) + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, alpha_sort)) shared.deepbooru_process.start() @@ -80,7 +81,7 @@ def get_deepbooru_tags_model(): return model, tags -def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort): import deepdanbooru as dd import tensorflow as tf import numpy as np @@ -105,15 +106,28 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): for i, tag in enumerate(tags): result_dict[tag] = y[i] - result_tags_out = [] + + unsorted_tags_in_theshold = [] result_tags_print = [] for tag in tags: if result_dict[tag] >= threshold: if tag.startswith("rating:"): continue - result_tags_out.append(tag) + unsorted_tags_in_theshold.append((result_dict[tag], tag)) result_tags_print.append(f'{result_dict[tag]} {tag}') + # sort tags + result_tags_out = [] + sort_ndx = 0 + print(alpha_sort) + if alpha_sort: + sort_ndx = 1 + + # sort by reverse by likelihood and normal for alpha + unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) + for weight, tag in unsorted_tags_in_theshold: + result_tags_out.append(tag) + print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') \ No newline at end of file + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') diff --git a/modules/shared.py b/modules/shared.py index 1995a99a..2e307809 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -261,6 +261,12 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters" 's_noise': OptionInfo(1.0, "sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), })) +if cmd_opts.deepdanbooru: + options_templates.update(options_section(('deepbooru-params', "DeepBooru parameters"), { + "deepbooru_sort_alpha": OptionInfo(True, "Sort Alphabetical", gr.Checkbox), + 'deepbooru_threshold': OptionInfo(0.5, "Threshold", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + })) + class Options: data = None -- cgit v1.2.3 From ff4ef13dd591ec52f196f344f47537695df95364 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 10:24:27 -0500 Subject: removed unneeded print --- modules/deepbooru.py | 1 - 1 file changed, 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index e31e92c0..89dcac3c 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -119,7 +119,6 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) # sort tags result_tags_out = [] sort_ndx = 0 - print(alpha_sort) if alpha_sort: sort_ndx = 1 -- cgit v1.2.3 From f53f703aebc801c4204182d52bb1e0bef9808e1f Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 18:12:12 -0500 Subject: resolved conflicts, moved settings under interrogate section, settings only show if deepbooru flag is enabled --- modules/deepbooru.py | 2 +- modules/shared.py | 19 +++++++++---------- modules/textual_inversion/preprocess.py | 2 +- modules/ui.py | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 89dcac3c..29529949 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -8,7 +8,7 @@ def get_deepbooru_tags(pil_image): This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(shared.opts.deepbooru_threshold, shared.opts.deepbooru_sort_alpha) + create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, shared.opts.deepbooru_sort_alpha) shared.deepbooru_process_return["value"] = -1 shared.deepbooru_process_queue.put(pil_image) while shared.deepbooru_process_return["value"] == -1: diff --git a/modules/shared.py b/modules/shared.py index 817203f8..5456c477 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -248,15 +248,20 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}), })) -options_templates.update(options_section(('interrogate', "Interrogate Options"), { +interrogate_option_dictionary = { "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"), "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum description length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), - "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)"), - "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), -})) + "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)") +} + +if cmd_opts.deepdanbooru: + interrogate_option_dictionary["interrogate_deepbooru_score_threshold"] = OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}) + interrogate_option_dictionary["deepbooru_sort_alpha"] = OptionInfo(True, "Interrogate: deepbooru sort alphabetically", gr.Checkbox) + +options_templates.update(options_section(('interrogate', "Interrogate Options"), interrogate_option_dictionary)) options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), @@ -282,12 +287,6 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters" 'eta_noise_seed_delta': OptionInfo(0, "Eta noise seed delta", gr.Number, {"precision": 0}), })) -if cmd_opts.deepdanbooru: - options_templates.update(options_section(('deepbooru-params', "DeepBooru parameters"), { - "deepbooru_sort_alpha": OptionInfo(True, "Sort Alphabetical", gr.Checkbox), - 'deepbooru_threshold': OptionInfo(0.5, "Threshold", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), - })) - class Options: data = None diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index a96388d6..113cecf1 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: diff --git a/modules/ui.py b/modules/ui.py index 2891fc8c..fa45edca 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -317,7 +317,7 @@ def interrogate(image): def interrogate_deepbooru(image): - prompt = get_deepbooru_tags(image, opts.interrogate_deepbooru_score_threshold) + prompt = get_deepbooru_tags(image) return gr_show(True) if prompt is None else prompt -- cgit v1.2.3 From 698d303b04e293635bfb49c525409f3bcf671dce Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 21:55:43 +0300 Subject: deepbooru: added option to use spaces or underscores deepbooru: added option to quote (\) in tags deepbooru/BLIP: write caption to file instead of image filename deepbooru/BLIP: now possible to use both for captions deepbooru: process is stopped even if an exception occurs --- modules/deepbooru.py | 65 ++++++++++++++++++----- modules/shared.py | 2 + modules/textual_inversion/preprocess.py | 92 ++++++++++++++------------------- modules/ui.py | 7 +-- 4 files changed, 95 insertions(+), 71 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 29529949..419e6a9c 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -2,33 +2,44 @@ import os.path from concurrent.futures import ProcessPoolExecutor import multiprocessing import time +import re + +re_special = re.compile(r'([\\()])') def get_deepbooru_tags(pil_image): """ This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, shared.opts.deepbooru_sort_alpha) - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process_queue.put(pil_image) - while shared.deepbooru_process_return["value"] == -1: - time.sleep(0.2) - tags = shared.deepbooru_process_return["value"] - release_process() - return tags + try: + create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts()) + return get_tags_from_process(pil_image) + finally: + release_process() + + +def create_deepbooru_opts(): + from modules import shared -def deepbooru_process(queue, deepbooru_process_return, threshold, alpha_sort): + return { + "use_spaces": shared.opts.deepbooru_use_spaces, + "use_escape": shared.opts.deepbooru_escape, + "alpha_sort": shared.opts.deepbooru_sort_alpha, + } + + +def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts): model, tags = get_deepbooru_tags_model() while True: # while process is running, keep monitoring queue for new image pil_image = queue.get() if pil_image == "QUIT": break else: - deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts) -def create_deepbooru_process(threshold, alpha_sort): +def create_deepbooru_process(threshold, deepbooru_opts): """ Creates deepbooru process. A queue is created to send images into the process. This enables multiple images to be processed in a row without reloading the model or creating a new process. To return the data, a shared @@ -41,10 +52,23 @@ def create_deepbooru_process(threshold, alpha_sort): shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, alpha_sort)) + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) shared.deepbooru_process.start() +def get_tags_from_process(image): + from modules import shared + + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = shared.deepbooru_process_return["value"] + shared.deepbooru_process_return["value"] = -1 + + return caption + + def release_process(): """ Stops the deepbooru process to return used memory @@ -81,10 +105,15 @@ def get_deepbooru_tags_model(): return model, tags -def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort): +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts): import deepdanbooru as dd import tensorflow as tf import numpy as np + + alpha_sort = deepbooru_opts['alpha_sort'] + use_spaces = deepbooru_opts['use_spaces'] + use_escape = deepbooru_opts['use_escape'] + width = model.input_shape[2] height = model.input_shape[1] image = np.array(pil_image) @@ -129,4 +158,12 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') + tags_text = ', '.join(result_tags_out) + + if use_spaces: + tags_text = tags_text.replace('_', ' ') + + if use_escape: + tags_text = re.sub(re_special, r'\\\1', tags_text) + + return tags_text.replace(':', ' ') diff --git a/modules/shared.py b/modules/shared.py index e64e69fc..78b73aae 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -260,6 +260,8 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), "deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"), + "deepbooru_use_spaces": OptionInfo(False, "use spaces for tags in deepbooru"), + "deepbooru_escape": OptionInfo(True, "escape (\\) brackets in deepbooru (so they are used as literal brackets and not for emphasis)"), })) options_templates.update(options_section(('ui', "User interface"), { diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 113cecf1..3047bede 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -10,7 +10,28 @@ from modules.shared import opts, cmd_opts if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru + def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): + try: + if process_caption: + shared.interrogator.load() + + if process_caption_deepbooru: + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, deepbooru.create_deepbooru_opts()) + + preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) + + finally: + + if process_caption: + shared.interrogator.send_blip_to_ram() + + if process_caption_deepbooru: + deepbooru.release_process() + + + +def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): width = process_width height = process_height src = os.path.abspath(process_src) @@ -25,30 +46,28 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) - if process_caption: - shared.interrogator.load() - - if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) - def save_pic_with_caption(image, index): + caption = "" + if process_caption: - caption = "-" + shared.interrogator.generate_caption(image) - caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") - elif process_caption_deepbooru: - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process_queue.put(image) - while shared.deepbooru_process_return["value"] == -1: - time.sleep(0.2) - caption = "-" + shared.deepbooru_process_return["value"] - caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") - shared.deepbooru_process_return["value"] = -1 - else: - caption = filename - caption = os.path.splitext(caption)[0] - caption = os.path.basename(caption) + caption += shared.interrogator.generate_caption(image) + + if process_caption_deepbooru: + if len(caption) > 0: + caption += ", " + caption += deepbooru.get_tags_from_process(image) + + filename_part = filename + filename_part = os.path.splitext(filename_part)[0] + filename_part = os.path.basename(filename_part) + + basename = f"{index:05}-{subindex[0]}-{filename_part}" + image.save(os.path.join(dst, f"{basename}.png")) + + if len(caption) > 0: + with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file: + file.write(caption) - image.save(os.path.join(dst, f"{index:05}-{subindex[0]}{caption}.png")) subindex[0] += 1 def save_pic(image, index): @@ -93,34 +112,3 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ save_pic(img, index) shared.state.nextjob() - - if process_caption: - shared.interrogator.send_blip_to_ram() - - if process_caption_deepbooru: - deepbooru.release_process() - - -def sanitize_caption(base_path, original_caption, suffix): - operating_system = platform.system().lower() - if (operating_system == "windows"): - invalid_path_characters = "\\/:*?\"<>|" - max_path_length = 259 - else: - invalid_path_characters = "/" #linux/macos - max_path_length = 1023 - caption = original_caption - for invalid_character in invalid_path_characters: - caption = caption.replace(invalid_character, "") - fixed_path_length = len(base_path) + len(suffix) - if fixed_path_length + len(caption) <= max_path_length: - return caption - caption_tokens = caption.split() - new_caption = "" - for token in caption_tokens: - last_caption = new_caption - new_caption = new_caption + token + " " - if (len(new_caption) + fixed_path_length - 1 > max_path_length): - break - print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr) - return last_caption.strip() diff --git a/modules/ui.py b/modules/ui.py index c42535c8..e07ee0e1 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1074,11 +1074,8 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images into two') - process_caption = gr.Checkbox(label='Use BLIP caption as filename') - if cmd_opts.deepdanbooru: - process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') - else: - process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename', visible=False) + process_caption = gr.Checkbox(label='Use BLIP for caption') + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) with gr.Row(): with gr.Column(scale=3): -- cgit v1.2.3 From efefa4862c6c75115d3da9f768348630cc32bdea Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Wed, 12 Oct 2022 13:03:00 -0700 Subject: [1/?] [wip] Reintroduce opts.interrogate_return_ranks looks functionally correct, needs testing Needs particular testing care around whether the colon usage (:) will break anything in whatever new use cases were introduced by https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/2143 --- modules/deepbooru.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 419e6a9c..2cbf2cab 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -26,6 +26,7 @@ def create_deepbooru_opts(): "use_spaces": shared.opts.deepbooru_use_spaces, "use_escape": shared.opts.deepbooru_escape, "alpha_sort": shared.opts.deepbooru_sort_alpha, + "include_ranks": shared.opts.interrogate_return_ranks, } @@ -113,6 +114,7 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o alpha_sort = deepbooru_opts['alpha_sort'] use_spaces = deepbooru_opts['use_spaces'] use_escape = deepbooru_opts['use_escape'] + include_ranks = deepbooru_opts['include_ranks'] width = model.input_shape[2] height = model.input_shape[1] @@ -151,19 +153,20 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o if alpha_sort: sort_ndx = 1 - # sort by reverse by likelihood and normal for alpha + # sort by reverse by likelihood and normal for alpha, and format tag text as requested unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) for weight, tag in unsorted_tags_in_theshold: - result_tags_out.append(tag) + # note: tag_outformat will still have a colon if include_ranks is True + tag_outformat = tag.replace(':', ' ') + if use_spaces: + tag_outformat = tag_outformat.replace('_', ' ') + if use_escape: + tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) + if include_ranks: + use_escape += f":{weight:.3f}" - print('\n'.join(sorted(result_tags_print, reverse=True))) - - tags_text = ', '.join(result_tags_out) + result_tags_out.append(tag_outformat) - if use_spaces: - tags_text = tags_text.replace('_', ' ') - - if use_escape: - tags_text = re.sub(re_special, r'\\\1', tags_text) + print('\n'.join(sorted(result_tags_print, reverse=True))) - return tags_text.replace(':', ' ') + return ', '.join(result_tags_out) -- cgit v1.2.3 From f776254b12361b5bae16f6629bcdcb47b450c48d Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Wed, 12 Oct 2022 13:08:06 -0700 Subject: [2/?] [wip] ignore OPT_INCLUDE_RANKS for training filenames --- modules/deepbooru.py | 3 ++- modules/textual_inversion/preprocess.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 2cbf2cab..fcc05819 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -19,6 +19,7 @@ def get_deepbooru_tags(pil_image): release_process() +OPT_INCLUDE_RANKS = "include_ranks" def create_deepbooru_opts(): from modules import shared @@ -26,7 +27,7 @@ def create_deepbooru_opts(): "use_spaces": shared.opts.deepbooru_use_spaces, "use_escape": shared.opts.deepbooru_escape, "alpha_sort": shared.opts.deepbooru_sort_alpha, - "include_ranks": shared.opts.interrogate_return_ranks, + OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks, } diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 3047bede..886cf0c3 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -17,7 +17,9 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, deepbooru.create_deepbooru_opts()) + db_opts = deepbooru.create_deepbooru_opts() + db_opts[deepbooru.OPT_INCLUDE_RANKS] = False + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) -- cgit v1.2.3 From 514456101b142b47acf87f6de95bad1a23d73be7 Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Wed, 12 Oct 2022 13:14:13 -0700 Subject: [3/?] [wip] fix incorrect variable reference still needs testing --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index fcc05819..c2004696 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -164,7 +164,7 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o if use_escape: tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) if include_ranks: - use_escape += f":{weight:.3f}" + tag_outformat += f":{weight:.3f}" result_tags_out.append(tag_outformat) -- cgit v1.2.3 From 04c0e643f2eec68d93a76db171b4d70595808702 Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Wed, 12 Oct 2022 22:13:53 -0700 Subject: Merge branch 'master' of https://github.com/HunterVacui/stable-diffusion-webui --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index c2004696..f34f3788 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -164,7 +164,7 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o if use_escape: tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) if include_ranks: - tag_outformat += f":{weight:.3f}" + tag_outformat = f"({tag_outformat}:{weight:.3f})" result_tags_out.append(tag_outformat) -- cgit v1.2.3 From c24df4b486a48c60f48276f7760a9acb4a13e22d Mon Sep 17 00:00:00 2001 From: CookieHCl Date: Sat, 15 Oct 2022 03:26:36 +0900 Subject: Disable compiling deepbooru model This is only necessary when you have to train, and compiling model produces warning. --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index f34f3788..4ad334a1 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -102,7 +102,7 @@ def get_deepbooru_tags_model(): tags = dd.project.load_tags_from_project(model_path) model = dd.project.load_model_from_project( - model_path, compile_model=True + model_path, compile_model=False ) return model, tags -- cgit v1.2.3 From d62ef76614624cda99d842a2900242d5b7923eda Mon Sep 17 00:00:00 2001 From: guaneec Date: Tue, 18 Oct 2022 03:09:50 +0800 Subject: Don't eat colons in booru tags --- modules/deepbooru.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 4ad334a1..de16b13f 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -157,8 +157,6 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o # sort by reverse by likelihood and normal for alpha, and format tag text as requested unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) for weight, tag in unsorted_tags_in_theshold: - # note: tag_outformat will still have a colon if include_ranks is True - tag_outformat = tag.replace(':', ' ') if use_spaces: tag_outformat = tag_outformat.replace('_', ' ') if use_escape: -- cgit v1.2.3 From 2e28c841f438b2090caac2b9a54eb62ddbda837c Mon Sep 17 00:00:00 2001 From: guaneec Date: Tue, 18 Oct 2022 03:15:41 +0800 Subject: Oops --- modules/deepbooru.py | 1 + 1 file changed, 1 insertion(+) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index de16b13f..8914662d 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -157,6 +157,7 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o # sort by reverse by likelihood and normal for alpha, and format tag text as requested unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) for weight, tag in unsorted_tags_in_theshold: + tag_outformat = tag if use_spaces: tag_outformat = tag_outformat.replace('_', ' ') if use_escape: -- cgit v1.2.3 From 72383abacdc6a101704a6f73758ce4d0bb68c9d1 Mon Sep 17 00:00:00 2001 From: Greendayle Date: Sat, 22 Oct 2022 16:50:07 +0200 Subject: Deepdanbooru linux fix --- modules/deepbooru.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 8914662d..3c34ab7c 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -50,7 +50,8 @@ def create_deepbooru_process(threshold, deepbooru_opts): the tags. """ from modules import shared # prevents circular reference - shared.deepbooru_process_manager = multiprocessing.Manager() + context = multiprocessing.get_context("spawn") + shared.deepbooru_process_manager = context.Manager() shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 -- cgit v1.2.3 From e38625011cd4955da4bc67fe95d1d0f4c0c53899 Mon Sep 17 00:00:00 2001 From: Greendayle Date: Sat, 22 Oct 2022 16:56:52 +0200 Subject: fix part2 --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 3c34ab7c..8bbc90a4 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -55,7 +55,7 @@ def create_deepbooru_process(threshold, deepbooru_opts): shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) + shared.deepbooru_process = context.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) shared.deepbooru_process.start() -- cgit v1.2.3 From c81d440d876dfd2ab3560410f37442ef56fc6632 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 20 Nov 2022 16:39:20 +0300 Subject: moved deepdanbooru to pure pytorch implementation --- README.md | 2 +- launch.py | 5 - modules/api/api.py | 10 +- modules/deepbooru.py | 258 +++++------- modules/deepbooru_model.py | 676 ++++++++++++++++++++++++++++++++ modules/shared.py | 2 +- modules/textual_inversion/preprocess.py | 12 +- modules/ui.py | 7 +- 8 files changed, 777 insertions(+), 195 deletions(-) create mode 100644 modules/deepbooru_model.py (limited to 'modules/deepbooru.py') diff --git a/README.md b/README.md index 33508f31..5f5ab3aa 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - separate prompts using uppercase `AND` - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2` - No token limit for prompts (original stable diffusion lets you use up to 75 tokens) -- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args) +- DeepDanbooru integration, creates danbooru style tags for anime prompts - [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args) - via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI - Generate forever option diff --git a/launch.py b/launch.py index 0f84b5d1..d2f1055c 100644 --- a/launch.py +++ b/launch.py @@ -134,7 +134,6 @@ def prepare_enviroment(): gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379") clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1") - deepdanbooru_package = os.environ.get('DEEPDANBOORU_PACKAGE', "git+https://github.com/KichangKim/DeepDanbooru.git@d91a2963bf87c6a770d74894667e9ffa9f6de7ff") xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl') @@ -158,7 +157,6 @@ def prepare_enviroment(): sys.argv, update_check = extract_arg(sys.argv, '--update-check') sys.argv, run_tests = extract_arg(sys.argv, '--tests') xformers = '--xformers' in sys.argv - deepdanbooru = '--deepdanbooru' in sys.argv ngrok = '--ngrok' in sys.argv try: @@ -193,9 +191,6 @@ def prepare_enviroment(): elif platform.system() == "Linux": run_pip("install xformers", "xformers") - if not is_installed("deepdanbooru") and deepdanbooru: - run_pip(f"install {deepdanbooru_package}#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru") - if not is_installed("pyngrok") and ngrok: run_pip("install pyngrok", "ngrok") diff --git a/modules/api/api.py b/modules/api/api.py index 79b2c818..7a567be3 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -9,7 +9,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials from secrets import compare_digest import modules.shared as shared -from modules import sd_samplers +from modules import sd_samplers, deepbooru from modules.api.models import * from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images from modules.extras import run_extras, run_pnginfo @@ -18,9 +18,6 @@ from modules.sd_models import checkpoints_list from modules.realesrgan_model import get_realesrgan_models from typing import List -if shared.cmd_opts.deepdanbooru: - from modules.deepbooru import get_deepbooru_tags - def upscaler_to_index(name: str): try: return [x.name.lower() for x in shared.sd_upscalers].index(name.lower()) @@ -245,10 +242,7 @@ class Api: if interrogatereq.model == "clip": processed = shared.interrogator.interrogate(img) elif interrogatereq.model == "deepdanbooru": - if shared.cmd_opts.deepdanbooru: - processed = get_deepbooru_tags(img) - else: - raise HTTPException(status_code=404, detail="Model not found. Add --deepdanbooru when launching for using the model.") + processed = deepbooru.model.tag(img) else: raise HTTPException(status_code=404, detail="Model not found") diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 8bbc90a4..b9066d81 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,173 +1,97 @@ -import os.path -from concurrent.futures import ProcessPoolExecutor -import multiprocessing -import time +import os import re +import torch +from PIL import Image +import numpy as np + +from modules import modelloader, paths, deepbooru_model, devices, images, shared + re_special = re.compile(r'([\\()])') -def get_deepbooru_tags(pil_image): - """ - This method is for running only one image at a time for simple use. Used to the img2img interrogate. - """ - from modules import shared # prevents circular reference - - try: - create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts()) - return get_tags_from_process(pil_image) - finally: - release_process() - - -OPT_INCLUDE_RANKS = "include_ranks" -def create_deepbooru_opts(): - from modules import shared - - return { - "use_spaces": shared.opts.deepbooru_use_spaces, - "use_escape": shared.opts.deepbooru_escape, - "alpha_sort": shared.opts.deepbooru_sort_alpha, - OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks, - } - - -def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts): - model, tags = get_deepbooru_tags_model() - while True: # while process is running, keep monitoring queue for new image - pil_image = queue.get() - if pil_image == "QUIT": - break - else: - deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts) - - -def create_deepbooru_process(threshold, deepbooru_opts): - """ - Creates deepbooru process. A queue is created to send images into the process. This enables multiple images - to be processed in a row without reloading the model or creating a new process. To return the data, a shared - dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned - to the dictionary and the method adding the image to the queue should wait for this value to be updated with - the tags. - """ - from modules import shared # prevents circular reference - context = multiprocessing.get_context("spawn") - shared.deepbooru_process_manager = context.Manager() - shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() - shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = context.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) - shared.deepbooru_process.start() - - -def get_tags_from_process(image): - from modules import shared - - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process_queue.put(image) - while shared.deepbooru_process_return["value"] == -1: - time.sleep(0.2) - caption = shared.deepbooru_process_return["value"] - shared.deepbooru_process_return["value"] = -1 - - return caption - - -def release_process(): - """ - Stops the deepbooru process to return used memory - """ - from modules import shared # prevents circular reference - shared.deepbooru_process_queue.put("QUIT") - shared.deepbooru_process.join() - shared.deepbooru_process_queue = None - shared.deepbooru_process = None - shared.deepbooru_process_return = None - shared.deepbooru_process_manager = None - -def get_deepbooru_tags_model(): - import deepdanbooru as dd - import tensorflow as tf - import numpy as np - this_folder = os.path.dirname(__file__) - model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) - if not os.path.exists(os.path.join(model_path, 'project.json')): - # there is no point importing these every time - import zipfile - from basicsr.utils.download_util import load_file_from_url - load_file_from_url( - r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", - model_path) - with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: - zip_ref.extractall(model_path) - os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) - - tags = dd.project.load_tags_from_project(model_path) - model = dd.project.load_model_from_project( - model_path, compile_model=False - ) - return model, tags - - -def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts): - import deepdanbooru as dd - import tensorflow as tf - import numpy as np - - alpha_sort = deepbooru_opts['alpha_sort'] - use_spaces = deepbooru_opts['use_spaces'] - use_escape = deepbooru_opts['use_escape'] - include_ranks = deepbooru_opts['include_ranks'] - - width = model.input_shape[2] - height = model.input_shape[1] - image = np.array(pil_image) - image = tf.image.resize( - image, - size=(height, width), - method=tf.image.ResizeMethod.AREA, - preserve_aspect_ratio=True, - ) - image = image.numpy() # EagerTensor to np.array - image = dd.image.transform_and_pad_image(image, width, height) - image = image / 255.0 - image_shape = image.shape - image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2])) - - y = model.predict(image)[0] - - result_dict = {} - - for i, tag in enumerate(tags): - result_dict[tag] = y[i] - - unsorted_tags_in_theshold = [] - result_tags_print = [] - for tag in tags: - if result_dict[tag] >= threshold: + +class DeepDanbooru: + def __init__(self): + self.model = None + + def load(self): + if self.model is not None: + return + + files = modelloader.load_models( + model_path=os.path.join(paths.models_path, "torch_deepdanbooru"), + model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt', + ext_filter=".pt", + download_name='model-resnet_custom_v3.pt', + ) + + self.model = deepbooru_model.DeepDanbooruModel() + self.model.load_state_dict(torch.load(files[0], map_location="cpu")) + + self.model.eval() + self.model.to(devices.cpu, devices.dtype) + + def start(self): + self.load() + self.model.to(devices.device) + + def stop(self): + if not shared.opts.interrogate_keep_models_in_memory: + self.model.to(devices.cpu) + devices.torch_gc() + + def tag(self, pil_image): + self.start() + res = self.tag_multi(pil_image) + self.stop() + + return res + + def tag_multi(self, pil_image, force_disable_ranks=False): + threshold = shared.opts.interrogate_deepbooru_score_threshold + use_spaces = shared.opts.deepbooru_use_spaces + use_escape = shared.opts.deepbooru_escape + alpha_sort = shared.opts.deepbooru_sort_alpha + include_ranks = shared.opts.interrogate_return_ranks and not force_disable_ranks + + pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512) + a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255 + + with torch.no_grad(), devices.autocast(): + x = torch.from_numpy(a).cuda() + y = self.model(x)[0].detach().cpu().numpy() + + probability_dict = {} + + for tag, probability in zip(self.model.tags, y): + if probability < threshold: + continue + if tag.startswith("rating:"): continue - unsorted_tags_in_theshold.append((result_dict[tag], tag)) - result_tags_print.append(f'{result_dict[tag]} {tag}') - - # sort tags - result_tags_out = [] - sort_ndx = 0 - if alpha_sort: - sort_ndx = 1 - - # sort by reverse by likelihood and normal for alpha, and format tag text as requested - unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) - for weight, tag in unsorted_tags_in_theshold: - tag_outformat = tag - if use_spaces: - tag_outformat = tag_outformat.replace('_', ' ') - if use_escape: - tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) - if include_ranks: - tag_outformat = f"({tag_outformat}:{weight:.3f})" - - result_tags_out.append(tag_outformat) - - print('\n'.join(sorted(result_tags_print, reverse=True))) - - return ', '.join(result_tags_out) + + probability_dict[tag] = probability + + if alpha_sort: + tags = sorted(probability_dict) + else: + tags = [tag for tag, _ in sorted(probability_dict.items(), key=lambda x: -x[1])] + + res = [] + + for tag in tags: + probability = probability_dict[tag] + tag_outformat = tag + if use_spaces: + tag_outformat = tag_outformat.replace('_', ' ') + if use_escape: + tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) + if include_ranks: + tag_outformat = f"({tag_outformat}:{probability:.3f})" + + res.append(tag_outformat) + + return ", ".join(res) + + +model = DeepDanbooru() diff --git a/modules/deepbooru_model.py b/modules/deepbooru_model.py new file mode 100644 index 00000000..edd40c81 --- /dev/null +++ b/modules/deepbooru_model.py @@ -0,0 +1,676 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +# see https://github.com/AUTOMATIC1111/TorchDeepDanbooru for more + + +class DeepDanbooruModel(nn.Module): + def __init__(self): + super(DeepDanbooruModel, self).__init__() + + self.tags = [] + + self.n_Conv_0 = nn.Conv2d(kernel_size=(7, 7), in_channels=3, out_channels=64, stride=(2, 2)) + self.n_MaxPool_0 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)) + self.n_Conv_1 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_2 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=64) + self.n_Conv_3 = nn.Conv2d(kernel_size=(3, 3), in_channels=64, out_channels=64) + self.n_Conv_4 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_5 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=64) + self.n_Conv_6 = nn.Conv2d(kernel_size=(3, 3), in_channels=64, out_channels=64) + self.n_Conv_7 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_8 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=64) + self.n_Conv_9 = nn.Conv2d(kernel_size=(3, 3), in_channels=64, out_channels=64) + self.n_Conv_10 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_11 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=512, stride=(2, 2)) + self.n_Conv_12 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=128) + self.n_Conv_13 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128, stride=(2, 2)) + self.n_Conv_14 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_15 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_16 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_17 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_18 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_19 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_20 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_21 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_22 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_23 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_24 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_25 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_26 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_27 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_28 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_29 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_30 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_31 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_32 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_33 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_34 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_35 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_36 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=1024, stride=(2, 2)) + self.n_Conv_37 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=256) + self.n_Conv_38 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256, stride=(2, 2)) + self.n_Conv_39 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_40 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_41 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_42 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_43 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_44 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_45 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_46 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_47 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_48 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_49 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_50 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_51 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_52 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_53 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_54 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_55 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_56 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_57 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_58 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_59 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_60 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_61 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_62 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_63 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_64 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_65 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_66 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_67 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_68 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_69 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_70 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_71 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_72 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_73 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_74 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_75 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_76 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_77 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_78 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_79 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_80 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_81 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_82 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_83 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_84 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_85 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_86 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_87 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_88 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_89 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_90 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_91 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_92 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_93 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_94 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_95 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_96 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_97 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_98 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256, stride=(2, 2)) + self.n_Conv_99 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_100 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=1024, stride=(2, 2)) + self.n_Conv_101 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_102 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_103 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_104 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_105 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_106 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_107 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_108 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_109 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_110 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_111 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_112 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_113 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_114 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_115 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_116 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_117 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_118 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_119 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_120 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_121 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_122 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_123 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_124 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_125 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_126 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_127 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_128 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_129 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_130 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_131 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_132 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_133 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_134 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_135 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_136 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_137 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_138 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_139 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_140 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_141 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_142 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_143 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_144 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_145 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_146 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_147 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_148 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_149 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_150 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_151 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_152 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_153 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_154 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_155 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_156 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_157 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_158 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=2048, stride=(2, 2)) + self.n_Conv_159 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=512) + self.n_Conv_160 = nn.Conv2d(kernel_size=(3, 3), in_channels=512, out_channels=512, stride=(2, 2)) + self.n_Conv_161 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=2048) + self.n_Conv_162 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=512) + self.n_Conv_163 = nn.Conv2d(kernel_size=(3, 3), in_channels=512, out_channels=512) + self.n_Conv_164 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=2048) + self.n_Conv_165 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=512) + self.n_Conv_166 = nn.Conv2d(kernel_size=(3, 3), in_channels=512, out_channels=512) + self.n_Conv_167 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=2048) + self.n_Conv_168 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=4096, stride=(2, 2)) + self.n_Conv_169 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=1024) + self.n_Conv_170 = nn.Conv2d(kernel_size=(3, 3), in_channels=1024, out_channels=1024, stride=(2, 2)) + self.n_Conv_171 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=4096) + self.n_Conv_172 = nn.Conv2d(kernel_size=(1, 1), in_channels=4096, out_channels=1024) + self.n_Conv_173 = nn.Conv2d(kernel_size=(3, 3), in_channels=1024, out_channels=1024) + self.n_Conv_174 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=4096) + self.n_Conv_175 = nn.Conv2d(kernel_size=(1, 1), in_channels=4096, out_channels=1024) + self.n_Conv_176 = nn.Conv2d(kernel_size=(3, 3), in_channels=1024, out_channels=1024) + self.n_Conv_177 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=4096) + self.n_Conv_178 = nn.Conv2d(kernel_size=(1, 1), in_channels=4096, out_channels=9176, bias=False) + + def forward(self, *inputs): + t_358, = inputs + t_359 = t_358.permute(*[0, 3, 1, 2]) + t_359_padded = F.pad(t_359, [2, 3, 2, 3], value=0) + t_360 = self.n_Conv_0(t_359_padded) + t_361 = F.relu(t_360) + t_361 = F.pad(t_361, [0, 1, 0, 1], value=float('-inf')) + t_362 = self.n_MaxPool_0(t_361) + t_363 = self.n_Conv_1(t_362) + t_364 = self.n_Conv_2(t_362) + t_365 = F.relu(t_364) + t_365_padded = F.pad(t_365, [1, 1, 1, 1], value=0) + t_366 = self.n_Conv_3(t_365_padded) + t_367 = F.relu(t_366) + t_368 = self.n_Conv_4(t_367) + t_369 = torch.add(t_368, t_363) + t_370 = F.relu(t_369) + t_371 = self.n_Conv_5(t_370) + t_372 = F.relu(t_371) + t_372_padded = F.pad(t_372, [1, 1, 1, 1], value=0) + t_373 = self.n_Conv_6(t_372_padded) + t_374 = F.relu(t_373) + t_375 = self.n_Conv_7(t_374) + t_376 = torch.add(t_375, t_370) + t_377 = F.relu(t_376) + t_378 = self.n_Conv_8(t_377) + t_379 = F.relu(t_378) + t_379_padded = F.pad(t_379, [1, 1, 1, 1], value=0) + t_380 = self.n_Conv_9(t_379_padded) + t_381 = F.relu(t_380) + t_382 = self.n_Conv_10(t_381) + t_383 = torch.add(t_382, t_377) + t_384 = F.relu(t_383) + t_385 = self.n_Conv_11(t_384) + t_386 = self.n_Conv_12(t_384) + t_387 = F.relu(t_386) + t_387_padded = F.pad(t_387, [0, 1, 0, 1], value=0) + t_388 = self.n_Conv_13(t_387_padded) + t_389 = F.relu(t_388) + t_390 = self.n_Conv_14(t_389) + t_391 = torch.add(t_390, t_385) + t_392 = F.relu(t_391) + t_393 = self.n_Conv_15(t_392) + t_394 = F.relu(t_393) + t_394_padded = F.pad(t_394, [1, 1, 1, 1], value=0) + t_395 = self.n_Conv_16(t_394_padded) + t_396 = F.relu(t_395) + t_397 = self.n_Conv_17(t_396) + t_398 = torch.add(t_397, t_392) + t_399 = F.relu(t_398) + t_400 = self.n_Conv_18(t_399) + t_401 = F.relu(t_400) + t_401_padded = F.pad(t_401, [1, 1, 1, 1], value=0) + t_402 = self.n_Conv_19(t_401_padded) + t_403 = F.relu(t_402) + t_404 = self.n_Conv_20(t_403) + t_405 = torch.add(t_404, t_399) + t_406 = F.relu(t_405) + t_407 = self.n_Conv_21(t_406) + t_408 = F.relu(t_407) + t_408_padded = F.pad(t_408, [1, 1, 1, 1], value=0) + t_409 = self.n_Conv_22(t_408_padded) + t_410 = F.relu(t_409) + t_411 = self.n_Conv_23(t_410) + t_412 = torch.add(t_411, t_406) + t_413 = F.relu(t_412) + t_414 = self.n_Conv_24(t_413) + t_415 = F.relu(t_414) + t_415_padded = F.pad(t_415, [1, 1, 1, 1], value=0) + t_416 = self.n_Conv_25(t_415_padded) + t_417 = F.relu(t_416) + t_418 = self.n_Conv_26(t_417) + t_419 = torch.add(t_418, t_413) + t_420 = F.relu(t_419) + t_421 = self.n_Conv_27(t_420) + t_422 = F.relu(t_421) + t_422_padded = F.pad(t_422, [1, 1, 1, 1], value=0) + t_423 = self.n_Conv_28(t_422_padded) + t_424 = F.relu(t_423) + t_425 = self.n_Conv_29(t_424) + t_426 = torch.add(t_425, t_420) + t_427 = F.relu(t_426) + t_428 = self.n_Conv_30(t_427) + t_429 = F.relu(t_428) + t_429_padded = F.pad(t_429, [1, 1, 1, 1], value=0) + t_430 = self.n_Conv_31(t_429_padded) + t_431 = F.relu(t_430) + t_432 = self.n_Conv_32(t_431) + t_433 = torch.add(t_432, t_427) + t_434 = F.relu(t_433) + t_435 = self.n_Conv_33(t_434) + t_436 = F.relu(t_435) + t_436_padded = F.pad(t_436, [1, 1, 1, 1], value=0) + t_437 = self.n_Conv_34(t_436_padded) + t_438 = F.relu(t_437) + t_439 = self.n_Conv_35(t_438) + t_440 = torch.add(t_439, t_434) + t_441 = F.relu(t_440) + t_442 = self.n_Conv_36(t_441) + t_443 = self.n_Conv_37(t_441) + t_444 = F.relu(t_443) + t_444_padded = F.pad(t_444, [0, 1, 0, 1], value=0) + t_445 = self.n_Conv_38(t_444_padded) + t_446 = F.relu(t_445) + t_447 = self.n_Conv_39(t_446) + t_448 = torch.add(t_447, t_442) + t_449 = F.relu(t_448) + t_450 = self.n_Conv_40(t_449) + t_451 = F.relu(t_450) + t_451_padded = F.pad(t_451, [1, 1, 1, 1], value=0) + t_452 = self.n_Conv_41(t_451_padded) + t_453 = F.relu(t_452) + t_454 = self.n_Conv_42(t_453) + t_455 = torch.add(t_454, t_449) + t_456 = F.relu(t_455) + t_457 = self.n_Conv_43(t_456) + t_458 = F.relu(t_457) + t_458_padded = F.pad(t_458, [1, 1, 1, 1], value=0) + t_459 = self.n_Conv_44(t_458_padded) + t_460 = F.relu(t_459) + t_461 = self.n_Conv_45(t_460) + t_462 = torch.add(t_461, t_456) + t_463 = F.relu(t_462) + t_464 = self.n_Conv_46(t_463) + t_465 = F.relu(t_464) + t_465_padded = F.pad(t_465, [1, 1, 1, 1], value=0) + t_466 = self.n_Conv_47(t_465_padded) + t_467 = F.relu(t_466) + t_468 = self.n_Conv_48(t_467) + t_469 = torch.add(t_468, t_463) + t_470 = F.relu(t_469) + t_471 = self.n_Conv_49(t_470) + t_472 = F.relu(t_471) + t_472_padded = F.pad(t_472, [1, 1, 1, 1], value=0) + t_473 = self.n_Conv_50(t_472_padded) + t_474 = F.relu(t_473) + t_475 = self.n_Conv_51(t_474) + t_476 = torch.add(t_475, t_470) + t_477 = F.relu(t_476) + t_478 = self.n_Conv_52(t_477) + t_479 = F.relu(t_478) + t_479_padded = F.pad(t_479, [1, 1, 1, 1], value=0) + t_480 = self.n_Conv_53(t_479_padded) + t_481 = F.relu(t_480) + t_482 = self.n_Conv_54(t_481) + t_483 = torch.add(t_482, t_477) + t_484 = F.relu(t_483) + t_485 = self.n_Conv_55(t_484) + t_486 = F.relu(t_485) + t_486_padded = F.pad(t_486, [1, 1, 1, 1], value=0) + t_487 = self.n_Conv_56(t_486_padded) + t_488 = F.relu(t_487) + t_489 = self.n_Conv_57(t_488) + t_490 = torch.add(t_489, t_484) + t_491 = F.relu(t_490) + t_492 = self.n_Conv_58(t_491) + t_493 = F.relu(t_492) + t_493_padded = F.pad(t_493, [1, 1, 1, 1], value=0) + t_494 = self.n_Conv_59(t_493_padded) + t_495 = F.relu(t_494) + t_496 = self.n_Conv_60(t_495) + t_497 = torch.add(t_496, t_491) + t_498 = F.relu(t_497) + t_499 = self.n_Conv_61(t_498) + t_500 = F.relu(t_499) + t_500_padded = F.pad(t_500, [1, 1, 1, 1], value=0) + t_501 = self.n_Conv_62(t_500_padded) + t_502 = F.relu(t_501) + t_503 = self.n_Conv_63(t_502) + t_504 = torch.add(t_503, t_498) + t_505 = F.relu(t_504) + t_506 = self.n_Conv_64(t_505) + t_507 = F.relu(t_506) + t_507_padded = F.pad(t_507, [1, 1, 1, 1], value=0) + t_508 = self.n_Conv_65(t_507_padded) + t_509 = F.relu(t_508) + t_510 = self.n_Conv_66(t_509) + t_511 = torch.add(t_510, t_505) + t_512 = F.relu(t_511) + t_513 = self.n_Conv_67(t_512) + t_514 = F.relu(t_513) + t_514_padded = F.pad(t_514, [1, 1, 1, 1], value=0) + t_515 = self.n_Conv_68(t_514_padded) + t_516 = F.relu(t_515) + t_517 = self.n_Conv_69(t_516) + t_518 = torch.add(t_517, t_512) + t_519 = F.relu(t_518) + t_520 = self.n_Conv_70(t_519) + t_521 = F.relu(t_520) + t_521_padded = F.pad(t_521, [1, 1, 1, 1], value=0) + t_522 = self.n_Conv_71(t_521_padded) + t_523 = F.relu(t_522) + t_524 = self.n_Conv_72(t_523) + t_525 = torch.add(t_524, t_519) + t_526 = F.relu(t_525) + t_527 = self.n_Conv_73(t_526) + t_528 = F.relu(t_527) + t_528_padded = F.pad(t_528, [1, 1, 1, 1], value=0) + t_529 = self.n_Conv_74(t_528_padded) + t_530 = F.relu(t_529) + t_531 = self.n_Conv_75(t_530) + t_532 = torch.add(t_531, t_526) + t_533 = F.relu(t_532) + t_534 = self.n_Conv_76(t_533) + t_535 = F.relu(t_534) + t_535_padded = F.pad(t_535, [1, 1, 1, 1], value=0) + t_536 = self.n_Conv_77(t_535_padded) + t_537 = F.relu(t_536) + t_538 = self.n_Conv_78(t_537) + t_539 = torch.add(t_538, t_533) + t_540 = F.relu(t_539) + t_541 = self.n_Conv_79(t_540) + t_542 = F.relu(t_541) + t_542_padded = F.pad(t_542, [1, 1, 1, 1], value=0) + t_543 = self.n_Conv_80(t_542_padded) + t_544 = F.relu(t_543) + t_545 = self.n_Conv_81(t_544) + t_546 = torch.add(t_545, t_540) + t_547 = F.relu(t_546) + t_548 = self.n_Conv_82(t_547) + t_549 = F.relu(t_548) + t_549_padded = F.pad(t_549, [1, 1, 1, 1], value=0) + t_550 = self.n_Conv_83(t_549_padded) + t_551 = F.relu(t_550) + t_552 = self.n_Conv_84(t_551) + t_553 = torch.add(t_552, t_547) + t_554 = F.relu(t_553) + t_555 = self.n_Conv_85(t_554) + t_556 = F.relu(t_555) + t_556_padded = F.pad(t_556, [1, 1, 1, 1], value=0) + t_557 = self.n_Conv_86(t_556_padded) + t_558 = F.relu(t_557) + t_559 = self.n_Conv_87(t_558) + t_560 = torch.add(t_559, t_554) + t_561 = F.relu(t_560) + t_562 = self.n_Conv_88(t_561) + t_563 = F.relu(t_562) + t_563_padded = F.pad(t_563, [1, 1, 1, 1], value=0) + t_564 = self.n_Conv_89(t_563_padded) + t_565 = F.relu(t_564) + t_566 = self.n_Conv_90(t_565) + t_567 = torch.add(t_566, t_561) + t_568 = F.relu(t_567) + t_569 = self.n_Conv_91(t_568) + t_570 = F.relu(t_569) + t_570_padded = F.pad(t_570, [1, 1, 1, 1], value=0) + t_571 = self.n_Conv_92(t_570_padded) + t_572 = F.relu(t_571) + t_573 = self.n_Conv_93(t_572) + t_574 = torch.add(t_573, t_568) + t_575 = F.relu(t_574) + t_576 = self.n_Conv_94(t_575) + t_577 = F.relu(t_576) + t_577_padded = F.pad(t_577, [1, 1, 1, 1], value=0) + t_578 = self.n_Conv_95(t_577_padded) + t_579 = F.relu(t_578) + t_580 = self.n_Conv_96(t_579) + t_581 = torch.add(t_580, t_575) + t_582 = F.relu(t_581) + t_583 = self.n_Conv_97(t_582) + t_584 = F.relu(t_583) + t_584_padded = F.pad(t_584, [0, 1, 0, 1], value=0) + t_585 = self.n_Conv_98(t_584_padded) + t_586 = F.relu(t_585) + t_587 = self.n_Conv_99(t_586) + t_588 = self.n_Conv_100(t_582) + t_589 = torch.add(t_587, t_588) + t_590 = F.relu(t_589) + t_591 = self.n_Conv_101(t_590) + t_592 = F.relu(t_591) + t_592_padded = F.pad(t_592, [1, 1, 1, 1], value=0) + t_593 = self.n_Conv_102(t_592_padded) + t_594 = F.relu(t_593) + t_595 = self.n_Conv_103(t_594) + t_596 = torch.add(t_595, t_590) + t_597 = F.relu(t_596) + t_598 = self.n_Conv_104(t_597) + t_599 = F.relu(t_598) + t_599_padded = F.pad(t_599, [1, 1, 1, 1], value=0) + t_600 = self.n_Conv_105(t_599_padded) + t_601 = F.relu(t_600) + t_602 = self.n_Conv_106(t_601) + t_603 = torch.add(t_602, t_597) + t_604 = F.relu(t_603) + t_605 = self.n_Conv_107(t_604) + t_606 = F.relu(t_605) + t_606_padded = F.pad(t_606, [1, 1, 1, 1], value=0) + t_607 = self.n_Conv_108(t_606_padded) + t_608 = F.relu(t_607) + t_609 = self.n_Conv_109(t_608) + t_610 = torch.add(t_609, t_604) + t_611 = F.relu(t_610) + t_612 = self.n_Conv_110(t_611) + t_613 = F.relu(t_612) + t_613_padded = F.pad(t_613, [1, 1, 1, 1], value=0) + t_614 = self.n_Conv_111(t_613_padded) + t_615 = F.relu(t_614) + t_616 = self.n_Conv_112(t_615) + t_617 = torch.add(t_616, t_611) + t_618 = F.relu(t_617) + t_619 = self.n_Conv_113(t_618) + t_620 = F.relu(t_619) + t_620_padded = F.pad(t_620, [1, 1, 1, 1], value=0) + t_621 = self.n_Conv_114(t_620_padded) + t_622 = F.relu(t_621) + t_623 = self.n_Conv_115(t_622) + t_624 = torch.add(t_623, t_618) + t_625 = F.relu(t_624) + t_626 = self.n_Conv_116(t_625) + t_627 = F.relu(t_626) + t_627_padded = F.pad(t_627, [1, 1, 1, 1], value=0) + t_628 = self.n_Conv_117(t_627_padded) + t_629 = F.relu(t_628) + t_630 = self.n_Conv_118(t_629) + t_631 = torch.add(t_630, t_625) + t_632 = F.relu(t_631) + t_633 = self.n_Conv_119(t_632) + t_634 = F.relu(t_633) + t_634_padded = F.pad(t_634, [1, 1, 1, 1], value=0) + t_635 = self.n_Conv_120(t_634_padded) + t_636 = F.relu(t_635) + t_637 = self.n_Conv_121(t_636) + t_638 = torch.add(t_637, t_632) + t_639 = F.relu(t_638) + t_640 = self.n_Conv_122(t_639) + t_641 = F.relu(t_640) + t_641_padded = F.pad(t_641, [1, 1, 1, 1], value=0) + t_642 = self.n_Conv_123(t_641_padded) + t_643 = F.relu(t_642) + t_644 = self.n_Conv_124(t_643) + t_645 = torch.add(t_644, t_639) + t_646 = F.relu(t_645) + t_647 = self.n_Conv_125(t_646) + t_648 = F.relu(t_647) + t_648_padded = F.pad(t_648, [1, 1, 1, 1], value=0) + t_649 = self.n_Conv_126(t_648_padded) + t_650 = F.relu(t_649) + t_651 = self.n_Conv_127(t_650) + t_652 = torch.add(t_651, t_646) + t_653 = F.relu(t_652) + t_654 = self.n_Conv_128(t_653) + t_655 = F.relu(t_654) + t_655_padded = F.pad(t_655, [1, 1, 1, 1], value=0) + t_656 = self.n_Conv_129(t_655_padded) + t_657 = F.relu(t_656) + t_658 = self.n_Conv_130(t_657) + t_659 = torch.add(t_658, t_653) + t_660 = F.relu(t_659) + t_661 = self.n_Conv_131(t_660) + t_662 = F.relu(t_661) + t_662_padded = F.pad(t_662, [1, 1, 1, 1], value=0) + t_663 = self.n_Conv_132(t_662_padded) + t_664 = F.relu(t_663) + t_665 = self.n_Conv_133(t_664) + t_666 = torch.add(t_665, t_660) + t_667 = F.relu(t_666) + t_668 = self.n_Conv_134(t_667) + t_669 = F.relu(t_668) + t_669_padded = F.pad(t_669, [1, 1, 1, 1], value=0) + t_670 = self.n_Conv_135(t_669_padded) + t_671 = F.relu(t_670) + t_672 = self.n_Conv_136(t_671) + t_673 = torch.add(t_672, t_667) + t_674 = F.relu(t_673) + t_675 = self.n_Conv_137(t_674) + t_676 = F.relu(t_675) + t_676_padded = F.pad(t_676, [1, 1, 1, 1], value=0) + t_677 = self.n_Conv_138(t_676_padded) + t_678 = F.relu(t_677) + t_679 = self.n_Conv_139(t_678) + t_680 = torch.add(t_679, t_674) + t_681 = F.relu(t_680) + t_682 = self.n_Conv_140(t_681) + t_683 = F.relu(t_682) + t_683_padded = F.pad(t_683, [1, 1, 1, 1], value=0) + t_684 = self.n_Conv_141(t_683_padded) + t_685 = F.relu(t_684) + t_686 = self.n_Conv_142(t_685) + t_687 = torch.add(t_686, t_681) + t_688 = F.relu(t_687) + t_689 = self.n_Conv_143(t_688) + t_690 = F.relu(t_689) + t_690_padded = F.pad(t_690, [1, 1, 1, 1], value=0) + t_691 = self.n_Conv_144(t_690_padded) + t_692 = F.relu(t_691) + t_693 = self.n_Conv_145(t_692) + t_694 = torch.add(t_693, t_688) + t_695 = F.relu(t_694) + t_696 = self.n_Conv_146(t_695) + t_697 = F.relu(t_696) + t_697_padded = F.pad(t_697, [1, 1, 1, 1], value=0) + t_698 = self.n_Conv_147(t_697_padded) + t_699 = F.relu(t_698) + t_700 = self.n_Conv_148(t_699) + t_701 = torch.add(t_700, t_695) + t_702 = F.relu(t_701) + t_703 = self.n_Conv_149(t_702) + t_704 = F.relu(t_703) + t_704_padded = F.pad(t_704, [1, 1, 1, 1], value=0) + t_705 = self.n_Conv_150(t_704_padded) + t_706 = F.relu(t_705) + t_707 = self.n_Conv_151(t_706) + t_708 = torch.add(t_707, t_702) + t_709 = F.relu(t_708) + t_710 = self.n_Conv_152(t_709) + t_711 = F.relu(t_710) + t_711_padded = F.pad(t_711, [1, 1, 1, 1], value=0) + t_712 = self.n_Conv_153(t_711_padded) + t_713 = F.relu(t_712) + t_714 = self.n_Conv_154(t_713) + t_715 = torch.add(t_714, t_709) + t_716 = F.relu(t_715) + t_717 = self.n_Conv_155(t_716) + t_718 = F.relu(t_717) + t_718_padded = F.pad(t_718, [1, 1, 1, 1], value=0) + t_719 = self.n_Conv_156(t_718_padded) + t_720 = F.relu(t_719) + t_721 = self.n_Conv_157(t_720) + t_722 = torch.add(t_721, t_716) + t_723 = F.relu(t_722) + t_724 = self.n_Conv_158(t_723) + t_725 = self.n_Conv_159(t_723) + t_726 = F.relu(t_725) + t_726_padded = F.pad(t_726, [0, 1, 0, 1], value=0) + t_727 = self.n_Conv_160(t_726_padded) + t_728 = F.relu(t_727) + t_729 = self.n_Conv_161(t_728) + t_730 = torch.add(t_729, t_724) + t_731 = F.relu(t_730) + t_732 = self.n_Conv_162(t_731) + t_733 = F.relu(t_732) + t_733_padded = F.pad(t_733, [1, 1, 1, 1], value=0) + t_734 = self.n_Conv_163(t_733_padded) + t_735 = F.relu(t_734) + t_736 = self.n_Conv_164(t_735) + t_737 = torch.add(t_736, t_731) + t_738 = F.relu(t_737) + t_739 = self.n_Conv_165(t_738) + t_740 = F.relu(t_739) + t_740_padded = F.pad(t_740, [1, 1, 1, 1], value=0) + t_741 = self.n_Conv_166(t_740_padded) + t_742 = F.relu(t_741) + t_743 = self.n_Conv_167(t_742) + t_744 = torch.add(t_743, t_738) + t_745 = F.relu(t_744) + t_746 = self.n_Conv_168(t_745) + t_747 = self.n_Conv_169(t_745) + t_748 = F.relu(t_747) + t_748_padded = F.pad(t_748, [0, 1, 0, 1], value=0) + t_749 = self.n_Conv_170(t_748_padded) + t_750 = F.relu(t_749) + t_751 = self.n_Conv_171(t_750) + t_752 = torch.add(t_751, t_746) + t_753 = F.relu(t_752) + t_754 = self.n_Conv_172(t_753) + t_755 = F.relu(t_754) + t_755_padded = F.pad(t_755, [1, 1, 1, 1], value=0) + t_756 = self.n_Conv_173(t_755_padded) + t_757 = F.relu(t_756) + t_758 = self.n_Conv_174(t_757) + t_759 = torch.add(t_758, t_753) + t_760 = F.relu(t_759) + t_761 = self.n_Conv_175(t_760) + t_762 = F.relu(t_761) + t_762_padded = F.pad(t_762, [1, 1, 1, 1], value=0) + t_763 = self.n_Conv_176(t_762_padded) + t_764 = F.relu(t_763) + t_765 = self.n_Conv_177(t_764) + t_766 = torch.add(t_765, t_760) + t_767 = F.relu(t_766) + t_768 = self.n_Conv_178(t_767) + t_769 = F.avg_pool2d(t_768, kernel_size=t_768.shape[-2:]) + t_770 = torch.squeeze(t_769, 3) + t_770 = torch.squeeze(t_770, 2) + t_771 = torch.sigmoid(t_770) + return t_771 + + def load_state_dict(self, state_dict, **kwargs): + self.tags = state_dict.get('tags', []) + + super(DeepDanbooruModel, self).load_state_dict({k: v for k, v in state_dict.items() if k != 'tags'}) + diff --git a/modules/shared.py b/modules/shared.py index a4457305..c93ae2a3 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -55,7 +55,7 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with parser.add_argument("--clip-models-path", type=str, help="Path to directory with CLIP model file(s).", default=None) parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers") parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work") -parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator") +parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything") parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.") parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 488aa5b5..56b9b2eb 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -6,12 +6,10 @@ import sys import tqdm import time -from modules import shared, images +from modules import shared, images, deepbooru from modules.paths import models_path from modules.shared import opts, cmd_opts from modules.textual_inversion import autocrop -if cmd_opts.deepdanbooru: - import modules.deepbooru as deepbooru def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False): @@ -20,9 +18,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce shared.interrogator.load() if process_caption_deepbooru: - db_opts = deepbooru.create_deepbooru_opts() - db_opts[deepbooru.OPT_INCLUDE_RANKS] = False - deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) + deepbooru.model.start() preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug) @@ -32,7 +28,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce shared.interrogator.send_blip_to_ram() if process_caption_deepbooru: - deepbooru.release_process() + deepbooru.model.stop() def listfiles(dirname): @@ -58,7 +54,7 @@ def save_pic_with_caption(image, index, params: PreprocessParams, existing_capti if params.process_caption_deepbooru: if len(caption) > 0: caption += ", " - caption += deepbooru.get_tags_from_process(image) + caption += deepbooru.model.tag_multi(image) filename_part = params.src filename_part = os.path.splitext(filename_part)[0] diff --git a/modules/ui.py b/modules/ui.py index a5953fce..e6da1b2a 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -19,14 +19,11 @@ import numpy as np from PIL import Image, PngImagePlugin -from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions +from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru from modules.paths import script_path from modules.shared import opts, cmd_opts, restricted_opts -if cmd_opts.deepdanbooru: - from modules.deepbooru import get_deepbooru_tags - import modules.codeformer_model import modules.generation_parameters_copypaste as parameters_copypaste import modules.gfpgan_model @@ -352,7 +349,7 @@ def interrogate(image): def interrogate_deepbooru(image): - prompt = get_deepbooru_tags(image) + prompt = deepbooru.model.tag(image) return gr_show(True) if prompt is None else prompt -- cgit v1.2.3 From 563ea3f6ff66e0eba44033163d24e42297465a47 Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 21 Nov 2022 02:56:00 -0500 Subject: Change .cuda() to .to(devices.device) --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index b9066d81..31ec7e17 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -58,7 +58,7 @@ class DeepDanbooru: a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255 with torch.no_grad(), devices.autocast(): - x = torch.from_numpy(a).cuda() + x = torch.from_numpy(a).to(devices.device) y = self.model(x)[0].detach().cpu().numpy() probability_dict = {} -- cgit v1.2.3 From 60bd4d52a658838c5ee3f6ddfe8d4db55cf1d764 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 3 Dec 2022 18:46:09 +0300 Subject: fix incorrect file extension filter for deepdanbooru models --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 31ec7e17..dfc83357 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -21,7 +21,7 @@ class DeepDanbooru: files = modelloader.load_models( model_path=os.path.join(paths.models_path, "torch_deepdanbooru"), model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt', - ext_filter=".pt", + ext_filter=[".pt"], download_name='model-resnet_custom_v3.pt', ) -- cgit v1.2.3 From 03d7b394539558f6f560155d87a4fc66eb675e30 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 24 Dec 2022 12:40:32 +0300 Subject: added an option to filter out deepbooru tags --- modules/deepbooru.py | 4 +++- modules/shared.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'modules/deepbooru.py') diff --git a/modules/deepbooru.py b/modules/deepbooru.py index dfc83357..122fce7f 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -79,7 +79,9 @@ class DeepDanbooru: res = [] - for tag in tags: + filtertags = set([x.strip().replace(' ', '_') for x in shared.opts.deepbooru_filter_tags.split(",")]) + + for tag in [x for x in tags if x not in filtertags]: probability = probability_dict[tag] tag_outformat = tag if use_spaces: diff --git a/modules/shared.py b/modules/shared.py index 8ea3b441..a75de535 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -386,6 +386,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"), "deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"), "deepbooru_use_spaces": OptionInfo(False, "use spaces for tags in deepbooru"), "deepbooru_escape": OptionInfo(True, "escape (\\) brackets in deepbooru (so they are used as literal brackets and not for emphasis)"), + "deepbooru_filter_tags": OptionInfo("", "filter out those tags from deepbooru output (separated by comma)"), })) options_templates.update(options_section(('ui', "User interface"), { -- cgit v1.2.3