Merge pull request #1752 from Greendayle/dev/deepdanbooru

Added DeepDanbooru interrogator
author: AUTOMATIC1111 <16777216c@gmail.com> 2022-10-09 07:52:21 +0000
committer: GitHub <noreply@github.com> 2022-10-09 07:52:21 +0000
commit: e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318 (patch)
tree: c01de5d0efb2f6bb414722e398e29316ecafb74f
parent: 14192c5b207b16b1ec7a4c9c4ea538d1a6811a4d (diff)
parent: 0ec80f0125a14c03ac860279f40c0c062dbde0cf (diff)
download: stable-diffusion-webui-gfx803-e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318.tar.gz
stable-diffusion-webui-gfx803-e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318.tar.bz2
stable-diffusion-webui-gfx803-e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318.zip
7 files changed, 110 insertions, 6 deletions
diff --git a/README.md b/README.md
index 63dd0c18..561eb03d 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
      - separate prompts using uppercase `AND`
      - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
 - No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
+- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
 
 ## Installation and Running
 Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
@@ -123,4 +124,5 @@ The documentation was moved from this README over to the project's [wiki](https:
 - Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
 - CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator
 - Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
+- DeepDanbooru - interrogator for anime diffusors https://github.com/KichangKim/DeepDanbooru
 - (You)
diff --git a/launch.py b/launch.py
index 1d65a779..b0a59b6a 100644
--- a/launch.py
+++ b/launch.py
@@ -33,6 +33,7 @@ def extract_arg(args, name):
 
 args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
 xformers = '--xformers' in args
+deepdanbooru = '--deepdanbooru' in args
 
 
 def repo_dir(name):
@@ -132,6 +133,9 @@ if not is_installed("xformers") and xformers and platform.python_version().start
     elif platform.system() == "Linux":
         run_pip("install xformers", "xformers")
 
+if not is_installed("deepdanbooru") and deepdanbooru:
+    run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")
+
 os.makedirs(dir_repos, exist_ok=True)
 
 git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)
diff --git a/models/deepbooru/Put your deepbooru release project folder here.txt b/models/deepbooru/Put your deepbooru release project folder here.txt
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/models/deepbooru/Put your deepbooru release project folder here.txt
diff --git a/modules/deepbooru.py b/modules/deepbooru.py
new file mode 100644
index 00000000..7e3c0618
--- /dev/null
+++ b/modules/deepbooru.py
@@ -0,0 +1,73 @@
+import os.path
+from concurrent.futures import ProcessPoolExecutor
+from multiprocessing import get_context
+
+
+def _load_tf_and_return_tags(pil_image, threshold):
+    import deepdanbooru as dd
+    import tensorflow as tf
+    import numpy as np
+
+    this_folder = os.path.dirname(__file__)
+    model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
+    if not os.path.exists(os.path.join(model_path, 'project.json')):
+        # there is no point importing these every time
+        import zipfile
+        from basicsr.utils.download_util import load_file_from_url
+        load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
+                           model_path)
+        with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
+            zip_ref.extractall(model_path)
+        os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
+
+    tags = dd.project.load_tags_from_project(model_path)
+    model = dd.project.load_model_from_project(
+        model_path, compile_model=True
+    )
+
+    width = model.input_shape[2]
+    height = model.input_shape[1]
+    image = np.array(pil_image)
+    image = tf.image.resize(
+        image,
+        size=(height, width),
+        method=tf.image.ResizeMethod.AREA,
+        preserve_aspect_ratio=True,
+    )
+    image = image.numpy()  # EagerTensor to np.array
+    image = dd.image.transform_and_pad_image(image, width, height)
+    image = image / 255.0
+    image_shape = image.shape
+    image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
+
+    y = model.predict(image)[0]
+
+    result_dict = {}
+
+    for i, tag in enumerate(tags):
+        result_dict[tag] = y[i]
+    result_tags_out = []
+    result_tags_print = []
+    for tag in tags:
+        if result_dict[tag] >= threshold:
+            if tag.startswith("rating:"):
+                continue
+            result_tags_out.append(tag)
+            result_tags_print.append(f'{result_dict[tag]} {tag}')
+
+    print('\n'.join(sorted(result_tags_print, reverse=True)))
+
+    return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ')
+
+
+def subprocess_init_no_cuda():
+    import os
+    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+
+def get_deepbooru_tags(pil_image, threshold=0.5):
+    context = get_context('spawn')
+    with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor:
+        f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, )
+        ret = f.result()  # will rethrow any exceptions
+    return ret
+\ No newline at end of file
diff --git a/modules/shared.py b/modules/shared.py
index 2dc092d6..b2c76a32 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -45,6 +45,7 @@ parser.add_argument("--swinir-models-path", type=str, help="Path to directory wi
 parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
 parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
 parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
+parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator")
 parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
 parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
diff --git a/modules/ui.py b/modules/ui.py
index debd8873..8071b1cb 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -25,6 +25,8 @@ import gradio.routes
 from modules import sd_hijack
 from modules.paths import script_path
 from modules.shared import opts, cmd_opts
+if cmd_opts.deepdanbooru:
+    from modules.deepbooru import get_deepbooru_tags
 import modules.shared as shared
 from modules.sd_samplers import samplers, samplers_for_img2img
 from modules.sd_hijack import model_hijack
@@ -308,6 +310,11 @@ def interrogate(image):
     return gr_show(True) if prompt is None else prompt
 
 
+def interrogate_deepbooru(image):
+    prompt = get_deepbooru_tags(image)
+    return gr_show(True) if prompt is None else prompt
+
+
 def create_seed_inputs():
     with gr.Row():
         with gr.Box():
@@ -444,15 +451,20 @@ def create_toprow(is_img2img):
                     outputs=[],
                 )
 
-            with gr.Row():
+            with gr.Row(scale=1):
                 if is_img2img:
-                    interrogate = gr.Button('Interrogate', elem_id="interrogate")
+                    interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate")
+                    if cmd_opts.deepdanbooru:
+                        deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru")
+                    else:
+                        deepbooru = None
                 else:
                     interrogate = None
+                    deepbooru = None
                 prompt_style_apply = gr.Button('Apply style', elem_id="style_apply")
                 save_style = gr.Button('Create style', elem_id="style_create")
 
-    return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, prompt_style_apply, save_style, paste, token_counter, token_button
+    return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button
 
 
 def setup_progressbar(progressbar, preview, id_part, textinfo=None):
@@ -481,7 +493,7 @@ def create_ui(wrap_gradio_gpu_call):
     import modules.txt2img
 
     with gr.Blocks(analytics_enabled=False) as txt2img_interface:
-        txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
+        txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
         dummy_component = gr.Label(visible=False)
 
         with gr.Row(elem_id='txt2img_progress_row'):
@@ -641,7 +653,7 @@ def create_ui(wrap_gradio_gpu_call):
             token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter])
 
     with gr.Blocks(analytics_enabled=False) as img2img_interface:
-        img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
+        img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
 
         with gr.Row(elem_id='img2img_progress_row'):
             with gr.Column(scale=1):
@@ -804,6 +816,13 @@ def create_ui(wrap_gradio_gpu_call):
                 outputs=[img2img_prompt],
             )
 
+            if cmd_opts.deepdanbooru:
+                img2img_deepbooru.click(
+                    fn=interrogate_deepbooru,
+                    inputs=[init_img],
+                    outputs=[img2img_prompt],
+                )
+
             save.click(
                 fn=wrap_gradio_call(save_files),
                 _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
diff --git a/style.css b/style.css
index 6904fc50..101d2052 100644
--- a/style.css
+++ b/style.css
@@ -103,7 +103,12 @@
 
 #style_apply, #style_create, #interrogate{
     margin: 0.75em 0.25em 0.25em 0.25em;
-    min-width: 3em;
+    min-width: 5em;
+}
+
+#style_apply, #style_create, #deepbooru{
+    margin: 0.75em 0.25em 0.25em 0.25em;
+    min-width: 5em;
 }
 
 #style_pos_col, #style_neg_col{
author	AUTOMATIC1111 <16777216c@gmail.com>	2022-10-09 07:52:21 +0000
committer	GitHub <noreply@github.com>	2022-10-09 07:52:21 +0000
commit	e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318 (patch)
tree	c01de5d0efb2f6bb414722e398e29316ecafb74f
parent	14192c5b207b16b1ec7a4c9c4ea538d1a6811a4d (diff)
parent	0ec80f0125a14c03ac860279f40c0c062dbde0cf (diff)
download	stable-diffusion-webui-gfx803-e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318.tar.gz stable-diffusion-webui-gfx803-e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318.tar.bz2 stable-diffusion-webui-gfx803-e00b4df7c6f0a13941d6f6ea425eebdaa2bc9318.zip