diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/deepbooru.py | 73 | ||||
-rw-r--r-- | modules/shared.py | 1 | ||||
-rw-r--r-- | modules/ui.py | 29 |
3 files changed, 98 insertions, 5 deletions
diff --git a/modules/deepbooru.py b/modules/deepbooru.py new file mode 100644 index 00000000..7e3c0618 --- /dev/null +++ b/modules/deepbooru.py @@ -0,0 +1,73 @@ +import os.path +from concurrent.futures import ProcessPoolExecutor +from multiprocessing import get_context + + +def _load_tf_and_return_tags(pil_image, threshold): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np + + this_folder = os.path.dirname(__file__) + model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) + if not os.path.exists(os.path.join(model_path, 'project.json')): + # there is no point importing these every time + import zipfile + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", + model_path) + with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: + zip_ref.extractall(model_path) + os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) + + tags = dd.project.load_tags_from_project(model_path) + model = dd.project.load_model_from_project( + model_path, compile_model=True + ) + + width = model.input_shape[2] + height = model.input_shape[1] + image = np.array(pil_image) + image = tf.image.resize( + image, + size=(height, width), + method=tf.image.ResizeMethod.AREA, + preserve_aspect_ratio=True, + ) + image = image.numpy() # EagerTensor to np.array + image = dd.image.transform_and_pad_image(image, width, height) + image = image / 255.0 + image_shape = image.shape + image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2])) + + y = model.predict(image)[0] + + result_dict = {} + + for i, tag in enumerate(tags): + result_dict[tag] = y[i] + result_tags_out = [] + result_tags_print = [] + for tag in tags: + if result_dict[tag] >= threshold: + if tag.startswith("rating:"): + continue + result_tags_out.append(tag) + result_tags_print.append(f'{result_dict[tag]} {tag}') + + print('\n'.join(sorted(result_tags_print, reverse=True))) + + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') + + +def subprocess_init_no_cuda(): + import os + os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + + +def get_deepbooru_tags(pil_image, threshold=0.5): + context = get_context('spawn') + with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor: + f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, ) + ret = f.result() # will rethrow any exceptions + return ret
\ No newline at end of file diff --git a/modules/shared.py b/modules/shared.py index 2dc092d6..b2c76a32 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -45,6 +45,7 @@ parser.add_argument("--swinir-models-path", type=str, help="Path to directory wi parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
+parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator")
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
diff --git a/modules/ui.py b/modules/ui.py index debd8873..8071b1cb 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -25,6 +25,8 @@ import gradio.routes from modules import sd_hijack
from modules.paths import script_path
from modules.shared import opts, cmd_opts
+if cmd_opts.deepdanbooru:
+ from modules.deepbooru import get_deepbooru_tags
import modules.shared as shared
from modules.sd_samplers import samplers, samplers_for_img2img
from modules.sd_hijack import model_hijack
@@ -308,6 +310,11 @@ def interrogate(image): return gr_show(True) if prompt is None else prompt
+def interrogate_deepbooru(image):
+ prompt = get_deepbooru_tags(image)
+ return gr_show(True) if prompt is None else prompt
+
+
def create_seed_inputs():
with gr.Row():
with gr.Box():
@@ -444,15 +451,20 @@ def create_toprow(is_img2img): outputs=[],
)
- with gr.Row():
+ with gr.Row(scale=1):
if is_img2img:
- interrogate = gr.Button('Interrogate', elem_id="interrogate")
+ interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate")
+ if cmd_opts.deepdanbooru:
+ deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru")
+ else:
+ deepbooru = None
else:
interrogate = None
+ deepbooru = None
prompt_style_apply = gr.Button('Apply style', elem_id="style_apply")
save_style = gr.Button('Create style', elem_id="style_create")
- return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, prompt_style_apply, save_style, paste, token_counter, token_button
+ return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button
def setup_progressbar(progressbar, preview, id_part, textinfo=None):
@@ -481,7 +493,7 @@ def create_ui(wrap_gradio_gpu_call): import modules.txt2img
with gr.Blocks(analytics_enabled=False) as txt2img_interface:
- txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
+ txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
dummy_component = gr.Label(visible=False)
with gr.Row(elem_id='txt2img_progress_row'):
@@ -641,7 +653,7 @@ def create_ui(wrap_gradio_gpu_call): token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter])
with gr.Blocks(analytics_enabled=False) as img2img_interface:
- img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
+ img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
with gr.Row(elem_id='img2img_progress_row'):
with gr.Column(scale=1):
@@ -804,6 +816,13 @@ def create_ui(wrap_gradio_gpu_call): outputs=[img2img_prompt],
)
+ if cmd_opts.deepdanbooru:
+ img2img_deepbooru.click(
+ fn=interrogate_deepbooru,
+ inputs=[init_img],
+ outputs=[img2img_prompt],
+ )
+
save.click(
fn=wrap_gradio_call(save_files),
_js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
|