From 087609ee181a91a523647435ffffa6288a317e2f Mon Sep 17 00:00:00 2001 From: captin411 Date: Wed, 19 Oct 2022 03:19:35 -0700 Subject: UI changes for focal point image cropping --- modules/ui.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'modules/ui.py') diff --git a/modules/ui.py b/modules/ui.py index 1ff7eb4f..b6be713b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1234,6 +1234,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images into two') + process_entropy_focus = gr.Checkbox(label='Create auto focal point crop') process_caption = gr.Checkbox(label='Use BLIP for caption') process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) @@ -1318,7 +1319,8 @@ def create_ui(wrap_gradio_gpu_call): process_flip, process_split, process_caption, - process_caption_deepbooru + process_caption_deepbooru, + process_entropy_focus ], outputs=[ ti_output, -- cgit v1.2.3 From db8ed5fe5cd6e967d12d43d96b7f83083e58626c Mon Sep 17 00:00:00 2001 From: captin411 Date: Tue, 25 Oct 2022 15:22:29 -0700 Subject: Focal crop UI elements --- modules/textual_inversion/preprocess.py | 26 +++++++++++++------------- modules/ui.py | 20 ++++++++++++++++++-- 2 files changed, 31 insertions(+), 15 deletions(-) (limited to 'modules/ui.py') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index a8c17c6f..1e4d4de8 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -13,7 +13,7 @@ if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru -def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_entropy_focus=False): +def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False): try: if process_caption: shared.interrogator.load() @@ -23,7 +23,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce db_opts[deepbooru.OPT_INCLUDE_RANKS] = False deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) - preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_entropy_focus) + preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug) finally: @@ -35,7 +35,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce -def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_entropy_focus=False): +def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False): width = process_width height = process_height src = os.path.abspath(process_src) @@ -139,27 +139,27 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre ratio = (img.height * width) / (img.width * height) inverse_xy = True - processing_option_ran = False + process_default_resize = True if process_split and ratio < 1.0 and ratio <= split_threshold: for splitted in split_pic(img, inverse_xy): save_pic(splitted, index, existing_caption=existing_caption) - processing_option_ran = True + process_default_resize = False if process_entropy_focus and img.height != img.width: autocrop_settings = autocrop.Settings( crop_width = width, crop_height = height, - face_points_weight = 0.9, - entropy_points_weight = 0.7, - corner_points_weight = 0.5, - annotate_image = False + face_points_weight = process_focal_crop_face_weight, + entropy_points_weight = process_focal_crop_entropy_weight, + corner_points_weight = process_focal_crop_edges_weight, + annotate_image = process_focal_crop_debug ) - focal = autocrop.crop_image(img, autocrop_settings) - save_pic(focal, index, existing_caption=existing_caption) - processing_option_ran = True + for focal in autocrop.crop_image(img, autocrop_settings): + save_pic(focal, index, existing_caption=existing_caption) + process_default_resize = False - if not processing_option_ran: + if process_default_resize: img = images.resize_image(1, img, width, height) save_pic(img, index, existing_caption=existing_caption) diff --git a/modules/ui.py b/modules/ui.py index 028eb4e5..95b9c703 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1260,7 +1260,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images') - process_entropy_focus = gr.Checkbox(label='Create auto focal point crop') + process_focal_crop = gr.Checkbox(label='Auto focal point crop') process_caption = gr.Checkbox(label='Use BLIP for caption') process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) @@ -1268,6 +1268,12 @@ def create_ui(wrap_gradio_gpu_call): process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05) process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05) + with gr.Row(visible=False) as process_focal_crop_row: + process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.3, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_debug = gr.Checkbox(label='Create debug image') + with gr.Row(): with gr.Column(scale=3): gr.HTML(value="") @@ -1281,6 +1287,12 @@ def create_ui(wrap_gradio_gpu_call): outputs=[process_split_extra_row], ) + process_focal_crop.change( + fn=lambda show: gr_show(show), + inputs=[process_focal_crop], + outputs=[process_focal_crop_row], + ) + with gr.Tab(label="Train"): gr.HTML(value="

Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images [wiki]

") with gr.Row(): @@ -1368,7 +1380,11 @@ def create_ui(wrap_gradio_gpu_call): process_caption_deepbooru, process_split_threshold, process_overlap_ratio, - process_entropy_focus, + process_focal_crop, + process_focal_crop_face_weight, + process_focal_crop_entropy_weight, + process_focal_crop_edges_weight, + process_focal_crop_debug, ], outputs=[ ti_output, -- cgit v1.2.3 From df0c5ea29d7f0c682ac81f184f3e482a6450d018 Mon Sep 17 00:00:00 2001 From: captin411 Date: Tue, 25 Oct 2022 17:06:59 -0700 Subject: update default weights --- modules/textual_inversion/autocrop.py | 16 ++++++++-------- modules/ui.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'modules/ui.py') diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py index 01a92b12..9859974a 100644 --- a/modules/textual_inversion/autocrop.py +++ b/modules/textual_inversion/autocrop.py @@ -71,9 +71,9 @@ def crop_image(im, settings): return results def focal_point(im, settings): - corner_points = image_corner_points(im, settings) - entropy_points = image_entropy_points(im, settings) - face_points = image_face_points(im, settings) + corner_points = image_corner_points(im, settings) if settings.corner_points_weight > 0 else [] + entropy_points = image_entropy_points(im, settings) if settings.entropy_points_weight > 0 else [] + face_points = image_face_points(im, settings) if settings.face_points_weight > 0 else [] pois = [] @@ -144,7 +144,7 @@ def image_face_points(im, settings): settings.dnn_model_path, "", (im.width, im.height), - 0.8, # score threshold + 0.9, # score threshold 0.3, # nms threshold 5000 # keep top k before nms ) @@ -159,7 +159,7 @@ def image_face_points(im, settings): results.append( PointOfInterest( int(x + (w * 0.5)), # face focus left/right is center - int(y + (h * 0)), # face focus up/down is close to the top of the head + int(y + (h * 0.33)), # face focus up/down is close to the top of the head size = w, weight = 1/len(faces[1]) ) @@ -207,7 +207,7 @@ def image_corner_points(im, settings): np_im, maxCorners=100, qualityLevel=0.04, - minDistance=min(grayscale.width, grayscale.height)*0.03, + minDistance=min(grayscale.width, grayscale.height)*0.06, useHarrisDetector=False, ) @@ -256,8 +256,8 @@ def image_entropy_points(im, settings): def image_entropy(im): # greyscale image entropy - band = np.asarray(im.convert("L")) - # band = np.asarray(im.convert("1"), dtype=np.uint8) + # band = np.asarray(im.convert("L")) + band = np.asarray(im.convert("1"), dtype=np.uint8) hist, _ = np.histogram(band, bins=range(0, 256)) hist = hist[hist > 0] return -np.log2(hist / hist.sum()).sum() diff --git a/modules/ui.py b/modules/ui.py index 95b9c703..095200a8 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1270,7 +1270,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(visible=False) as process_focal_crop_row: process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05) - process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.3, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05) process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05) process_focal_crop_debug = gr.Checkbox(label='Create debug image') -- cgit v1.2.3 From de096d0ce752c96e45508dcc7b9e84f7dbe10cca Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Tue, 25 Oct 2022 14:48:49 +0900 Subject: Weight initialization and More activation func add weight init add weight init option in create_hypernetwork fstringify hypernet info save weight initialization info for further debugging fill bias with zero for He/Xavier initialize LayerNorm with Normal fix loading weight_init --- modules/hypernetworks/hypernetwork.py | 47 ++++++++++++++++++++++++++++------- modules/hypernetworks/ui.py | 4 ++- modules/ui.py | 4 ++- 3 files changed, 44 insertions(+), 11 deletions(-) (limited to 'modules/ui.py') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index d647ea55..afbcdff8 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -5,6 +5,7 @@ import html import os import sys import traceback +import inspect import modules.textual_inversion.dataset import torch @@ -15,10 +16,12 @@ from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum +from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_ from collections import defaultdict, deque from statistics import stdev, mean + class HypernetworkModule(torch.nn.Module): multiplier = 1.0 activation_dict = { @@ -26,9 +29,12 @@ class HypernetworkModule(torch.nn.Module): "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, "swish": torch.nn.Hardswish, + "tanh": torch.nn.Tanh, + "sigmoid": torch.nn.Sigmoid, } + activation_dict.update({cls_name: cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -65,9 +71,24 @@ class HypernetworkModule(torch.nn.Module): else: for layer in self.linear: if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: - layer.weight.data.normal_(mean=0.0, std=0.01) - layer.bias.data.zero_() - + w, b = layer.weight.data, layer.bias.data + if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: + normal_(w, mean=0.0, std=0.01) + normal_(b, mean=0.0, std=0.005) + elif weight_init == 'XavierUniform': + xavier_uniform_(w) + zeros_(b) + elif weight_init == 'XavierNormal': + xavier_normal_(w) + zeros_(b) + elif weight_init == 'KaimingUniform': + kaiming_uniform_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + elif weight_init == 'KaimingNormal': + kaiming_normal_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + else: + raise KeyError(f"Key {weight_init} is not defined as initialization!") self.to(devices.device) def fix_old_state_dict(self, state_dict): @@ -105,7 +126,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False): self.filename = None self.name = name self.layers = {} @@ -114,13 +135,14 @@ class Hypernetwork: self.sd_checkpoint_name = None self.layer_structure = layer_structure self.activation_func = activation_func + self.weight_init = weight_init self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), ) def weights(self): @@ -144,6 +166,7 @@ class Hypernetwork: state_dict['layer_structure'] = self.layer_structure state_dict['activation_func'] = self.activation_func state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['weight_initialization'] = self.weight_init state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -158,15 +181,21 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) + print(self.layer_structure) self.activation_func = state_dict.get('activation_func', None) + print(f"Activation function is {self.activation_func}") + self.weight_init = state_dict.get('weight_initialization', 'Normal') + print(f"Weight initialization is {self.weight_init}") self.add_layer_norm = state_dict.get('is_layer_norm', False) + print(f"Layer norm is set to {self.add_layer_norm}") self.use_dropout = state_dict.get('use_dropout', False) + print(f"Dropout usage is set to {self.use_dropout}" ) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout), ) self.name = state_dict.get('name', self.name) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 2b472d87..2c6c0470 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -8,8 +8,9 @@ import modules.textual_inversion.textual_inversion from modules import devices, sd_hijack, shared from modules.hypernetworks import hypernetwork +keys = list(hypernetwork.HypernetworkModule.activation_dict.keys()) -def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): +def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False): # Remove illegal characters from name. name = "".join( x for x in name if (x.isalnum() or x in "._- ")) @@ -25,6 +26,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, enable_sizes=[int(x) for x in enable_sizes], layer_structure=layer_structure, activation_func=activation_func, + weight_init=weight_init, add_layer_norm=add_layer_norm, use_dropout=use_dropout, ) diff --git a/modules/ui.py b/modules/ui.py index 03528968..8e343258 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1238,7 +1238,8 @@ def create_ui(wrap_gradio_gpu_call): new_hypernetwork_name = gr.Textbox(label="Name") new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'") - new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=["linear", "relu", "leakyrelu", "elu", "swish"]) + new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=modules.hypernetworks.ui.keys) + new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. relu-like - Kaiming, sigmoid-like - Xavier is recommended", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"]) new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization") new_hypernetwork_use_dropout = gr.Checkbox(label="Use dropout") overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork") @@ -1342,6 +1343,7 @@ def create_ui(wrap_gradio_gpu_call): overwrite_old_hypernetwork, new_hypernetwork_layer_structure, new_hypernetwork_activation_func, + new_hypernetwork_initialization_option, new_hypernetwork_add_layer_norm, new_hypernetwork_use_dropout ], -- cgit v1.2.3