From abeec4b63029c2c4151a78fc395d312113881845 Mon Sep 17 00:00:00 2001 From: captin411 Date: Wed, 19 Oct 2022 03:18:26 -0700 Subject: Add auto focal point cropping to Preprocess images This algorithm plots a bunch of points of interest on the source image and averages their locations to find a center. Most points come from OpenCV. One point comes from an entropy model. OpenCV points account for 50% of the weight and the entropy based point is the other 50%. The center of all weighted points is calculated and a bounding box is drawn as close to centered over that point as possible. --- modules/textual_inversion/preprocess.py | 151 ++++++++++++++++++++++++++++++-- 1 file changed, 146 insertions(+), 5 deletions(-) (limited to 'modules/textual_inversion/preprocess.py') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 886cf0c3..168bfb09 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -1,5 +1,7 @@ import os -from PIL import Image, ImageOps +import cv2 +import numpy as np +from PIL import Image, ImageOps, ImageDraw import platform import sys import tqdm @@ -11,7 +13,7 @@ if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru -def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): +def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False, process_entropy_focus=False): try: if process_caption: shared.interrogator.load() @@ -21,7 +23,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ db_opts[deepbooru.OPT_INCLUDE_RANKS] = False deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) - preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) + preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru, process_entropy_focus) finally: @@ -33,7 +35,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ -def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): +def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False, process_entropy_focus=False): width = process_width height = process_height src = os.path.abspath(process_src) @@ -93,6 +95,8 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro is_tall = ratio > 1.35 is_wide = ratio < 1 / 1.35 + processing_option_ran = False + if process_split and is_tall: img = img.resize((width, height * img.height // img.width)) @@ -101,6 +105,8 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro bot = img.crop((0, img.height - height, width, img.height)) save_pic(bot, index) + + processing_option_ran = True elif process_split and is_wide: img = img.resize((width * img.width // img.height, height)) @@ -109,8 +115,143 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro right = img.crop((img.width - width, 0, img.width, height)) save_pic(right, index) - else: + + processing_option_ran = True + + if process_entropy_focus and (is_tall or is_wide): + if is_tall: + img = img.resize((width, height * img.height // img.width)) + else: + img = img.resize((width * img.width // img.height, height)) + + x_focal_center, y_focal_center = image_central_focal_point(img, width, height) + + # take the focal point and turn it into crop coordinates that try to center over the focal + # point but then get adjusted back into the frame + y_half = int(height / 2) + x_half = int(width / 2) + + x1 = x_focal_center - x_half + if x1 < 0: + x1 = 0 + elif x1 + width > img.width: + x1 = img.width - width + + y1 = y_focal_center - y_half + if y1 < 0: + y1 = 0 + elif y1 + height > img.height: + y1 = img.height - height + + x2 = x1 + width + y2 = y1 + height + + crop = [x1, y1, x2, y2] + + focal = img.crop(tuple(crop)) + save_pic(focal, index) + + processing_option_ran = True + + if not processing_option_ran: img = images.resize_image(1, img, width, height) save_pic(img, index) shared.state.nextjob() + + +def image_central_focal_point(im, target_width, target_height): + focal_points = [] + + focal_points.extend( + image_focal_points(im) + ) + + fp_entropy = image_entropy_point(im, target_width, target_height) + fp_entropy['weight'] = len(focal_points) + 1 # about half of the weight to entropy + + focal_points.append(fp_entropy) + + weight = 0.0 + x = 0.0 + y = 0.0 + for focal_point in focal_points: + weight += focal_point['weight'] + x += focal_point['x'] * focal_point['weight'] + y += focal_point['y'] * focal_point['weight'] + avg_x = round(x // weight) + avg_y = round(y // weight) + + return avg_x, avg_y + + +def image_focal_points(im): + grayscale = im.convert("L") + + # naive attempt at preventing focal points from collecting at watermarks near the bottom + gd = ImageDraw.Draw(grayscale) + gd.rectangle([0, im.height*.9, im.width, im.height], fill="#999") + + np_im = np.array(grayscale) + + points = cv2.goodFeaturesToTrack( + np_im, + maxCorners=50, + qualityLevel=0.04, + minDistance=min(grayscale.width, grayscale.height)*0.05, + useHarrisDetector=False, + ) + + if points is None: + return [] + + focal_points = [] + for point in points: + x, y = point.ravel() + focal_points.append({ + 'x': x, + 'y': y, + 'weight': 1.0 + }) + + return focal_points + + +def image_entropy_point(im, crop_width, crop_height): + img = im.copy() + # just make it easier to slide the test crop with images oriented the same way + if (img.size[0] < img.size[1]): + portrait = True + img = img.rotate(90, expand=1) + + e_max = 0 + crop_current = [0, 0, crop_width, crop_height] + crop_best = crop_current + while crop_current[2] < img.size[0]: + crop = img.crop(tuple(crop_current)) + e = image_entropy(crop) + + if (e_max < e): + e_max = e + crop_best = list(crop_current) + + crop_current[0] += 4 + crop_current[2] += 4 + + x_mid = int((crop_best[2] - crop_best[0])/2) + y_mid = int((crop_best[3] - crop_best[1])/2) + + return { + 'x': x_mid, + 'y': y_mid, + 'weight': 1.0 + } + + +def image_entropy(im): + # greyscale image entropy + band = np.asarray(im.convert("L")) + hist, _ = np.histogram(band, bins=range(0, 256)) + hist = hist[hist > 0] + return -np.log2(hist / hist.sum()).sum() + -- cgit v1.2.3 From 41e3877be2c667316515c86037413763eb0ba4da Mon Sep 17 00:00:00 2001 From: captin411 Date: Wed, 19 Oct 2022 13:44:59 -0700 Subject: fix entropy point calculation --- modules/textual_inversion/preprocess.py | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'modules/textual_inversion/preprocess.py') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 168bfb09..7c1a594e 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -196,9 +196,9 @@ def image_focal_points(im): points = cv2.goodFeaturesToTrack( np_im, - maxCorners=50, + maxCorners=100, qualityLevel=0.04, - minDistance=min(grayscale.width, grayscale.height)*0.05, + minDistance=min(grayscale.width, grayscale.height)*0.07, useHarrisDetector=False, ) @@ -218,28 +218,32 @@ def image_focal_points(im): def image_entropy_point(im, crop_width, crop_height): - img = im.copy() - # just make it easier to slide the test crop with images oriented the same way - if (img.size[0] < img.size[1]): - portrait = True - img = img.rotate(90, expand=1) + landscape = im.height < im.width + portrait = im.height > im.width + if landscape: + move_idx = [0, 2] + move_max = im.size[0] + elif portrait: + move_idx = [1, 3] + move_max = im.size[1] e_max = 0 crop_current = [0, 0, crop_width, crop_height] crop_best = crop_current - while crop_current[2] < img.size[0]: - crop = img.crop(tuple(crop_current)) + while crop_current[move_idx[1]] < move_max: + crop = im.crop(tuple(crop_current)) e = image_entropy(crop) - if (e_max < e): + if (e > e_max): e_max = e crop_best = list(crop_current) - crop_current[0] += 4 - crop_current[2] += 4 + crop_current[move_idx[0]] += 4 + crop_current[move_idx[1]] += 4 + + x_mid = int(crop_best[0] + crop_width/2) + y_mid = int(crop_best[1] + crop_height/2) - x_mid = int((crop_best[2] - crop_best[0])/2) - y_mid = int((crop_best[3] - crop_best[1])/2) return { 'x': x_mid, @@ -250,7 +254,7 @@ def image_entropy_point(im, crop_width, crop_height): def image_entropy(im): # greyscale image entropy - band = np.asarray(im.convert("L")) + band = np.asarray(im.convert("1")) hist, _ = np.histogram(band, bins=range(0, 256)) hist = hist[hist > 0] return -np.log2(hist / hist.sum()).sum() -- cgit v1.2.3 From 59ed74438318af893d2cba552b0e28dbc2a9266c Mon Sep 17 00:00:00 2001 From: captin411 Date: Wed, 19 Oct 2022 17:19:02 -0700 Subject: face detection algo, configurability, reusability Try to move the crop in the direction of a face if it is present More internal configuration options for choosing weights of each of the algorithm's findings Move logic into its module --- modules/textual_inversion/autocrop.py | 216 ++++++++++++++++++++++++++++++++ modules/textual_inversion/preprocess.py | 150 +++------------------- 2 files changed, 230 insertions(+), 136 deletions(-) create mode 100644 modules/textual_inversion/autocrop.py (limited to 'modules/textual_inversion/preprocess.py') diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py new file mode 100644 index 00000000..f858a958 --- /dev/null +++ b/modules/textual_inversion/autocrop.py @@ -0,0 +1,216 @@ +import cv2 +from collections import defaultdict +from math import log, sqrt +import numpy as np +from PIL import Image, ImageDraw + +GREEN = "#0F0" +BLUE = "#00F" +RED = "#F00" + +def crop_image(im, settings): + """ Intelligently crop an image to the subject matter """ + if im.height > im.width: + im = im.resize((settings.crop_width, settings.crop_height * im.height // im.width)) + else: + im = im.resize((settings.crop_width * im.width // im.height, settings.crop_height)) + + focus = focal_point(im, settings) + + # take the focal point and turn it into crop coordinates that try to center over the focal + # point but then get adjusted back into the frame + y_half = int(settings.crop_height / 2) + x_half = int(settings.crop_width / 2) + + x1 = focus.x - x_half + if x1 < 0: + x1 = 0 + elif x1 + settings.crop_width > im.width: + x1 = im.width - settings.crop_width + + y1 = focus.y - y_half + if y1 < 0: + y1 = 0 + elif y1 + settings.crop_height > im.height: + y1 = im.height - settings.crop_height + + x2 = x1 + settings.crop_width + y2 = y1 + settings.crop_height + + crop = [x1, y1, x2, y2] + + if settings.annotate_image: + d = ImageDraw.Draw(im) + rect = list(crop) + rect[2] -= 1 + rect[3] -= 1 + d.rectangle(rect, outline=GREEN) + if settings.destop_view_image: + im.show() + + return im.crop(tuple(crop)) + +def focal_point(im, settings): + corner_points = image_corner_points(im, settings) + entropy_points = image_entropy_points(im, settings) + face_points = image_face_points(im, settings) + + total_points = len(corner_points) + len(entropy_points) + len(face_points) + + corner_weight = settings.corner_points_weight + entropy_weight = settings.entropy_points_weight + face_weight = settings.face_points_weight + + weight_pref_total = corner_weight + entropy_weight + face_weight + + # weight things + pois = [] + if weight_pref_total == 0 or total_points == 0: + return pois + + pois.extend( + [ PointOfInterest( p.x, p.y, weight=p.weight * ( (corner_weight/weight_pref_total) / (len(corner_points)/total_points) )) for p in corner_points ] + ) + pois.extend( + [ PointOfInterest( p.x, p.y, weight=p.weight * ( (entropy_weight/weight_pref_total) / (len(entropy_points)/total_points) )) for p in entropy_points ] + ) + pois.extend( + [ PointOfInterest( p.x, p.y, weight=p.weight * ( (face_weight/weight_pref_total) / (len(face_points)/total_points) )) for p in face_points ] + ) + + if settings.annotate_image: + d = ImageDraw.Draw(im) + + average_point = poi_average(pois, settings, im=im) + + if settings.annotate_image: + d.ellipse([average_point.x - 25, average_point.y - 25, average_point.x + 25, average_point.y + 25], outline=GREEN) + + return average_point + + +def image_face_points(im, settings): + np_im = np.array(im) + gray = cv2.cvtColor(np_im, cv2.COLOR_BGR2GRAY) + classifier = cv2.CascadeClassifier(f'{cv2.data.haarcascades}haarcascade_frontalface_default.xml') + + minsize = int(min(im.width, im.height) * 0.15) # at least N percent of the smallest side + faces = classifier.detectMultiScale(gray, scaleFactor=1.05, + minNeighbors=5, minSize=(minsize, minsize), flags=cv2.CASCADE_SCALE_IMAGE) + + if len(faces) == 0: + return [] + + rects = [[f[0], f[1], f[0] + f[2], f[1] + f[3]] for f in faces] + if settings.annotate_image: + for f in rects: + d = ImageDraw.Draw(im) + d.rectangle(f, outline=RED) + + return [PointOfInterest((r[0] +r[2]) // 2, (r[1] + r[3]) // 2) for r in rects] + + +def image_corner_points(im, settings): + grayscale = im.convert("L") + + # naive attempt at preventing focal points from collecting at watermarks near the bottom + gd = ImageDraw.Draw(grayscale) + gd.rectangle([0, im.height*.9, im.width, im.height], fill="#999") + + np_im = np.array(grayscale) + + points = cv2.goodFeaturesToTrack( + np_im, + maxCorners=100, + qualityLevel=0.04, + minDistance=min(grayscale.width, grayscale.height)*0.07, + useHarrisDetector=False, + ) + + if points is None: + return [] + + focal_points = [] + for point in points: + x, y = point.ravel() + focal_points.append(PointOfInterest(x, y)) + + return focal_points + + +def image_entropy_points(im, settings): + landscape = im.height < im.width + portrait = im.height > im.width + if landscape: + move_idx = [0, 2] + move_max = im.size[0] + elif portrait: + move_idx = [1, 3] + move_max = im.size[1] + else: + return [] + + e_max = 0 + crop_current = [0, 0, settings.crop_width, settings.crop_height] + crop_best = crop_current + while crop_current[move_idx[1]] < move_max: + crop = im.crop(tuple(crop_current)) + e = image_entropy(crop) + + if (e > e_max): + e_max = e + crop_best = list(crop_current) + + crop_current[move_idx[0]] += 4 + crop_current[move_idx[1]] += 4 + + x_mid = int(crop_best[0] + settings.crop_width/2) + y_mid = int(crop_best[1] + settings.crop_height/2) + + return [PointOfInterest(x_mid, y_mid)] + + +def image_entropy(im): + # greyscale image entropy + band = np.asarray(im.convert("1")) + hist, _ = np.histogram(band, bins=range(0, 256)) + hist = hist[hist > 0] + return -np.log2(hist / hist.sum()).sum() + + +def poi_average(pois, settings, im=None): + weight = 0.0 + x = 0.0 + y = 0.0 + for pois in pois: + if settings.annotate_image and im is not None: + w = 4 * 0.5 * sqrt(pois.weight) + d = ImageDraw.Draw(im) + d.ellipse([ + pois.x - w, pois.y - w, + pois.x + w, pois.y + w ], fill=BLUE) + weight += pois.weight + x += pois.x * pois.weight + y += pois.y * pois.weight + avg_x = round(x / weight) + avg_y = round(y / weight) + + return PointOfInterest(avg_x, avg_y) + + +class PointOfInterest: + def __init__(self, x, y, weight=1.0): + self.x = x + self.y = y + self.weight = weight + + +class Settings: + def __init__(self, crop_width=512, crop_height=512, corner_points_weight=0.5, entropy_points_weight=0.5, face_points_weight=0.5, annotate_image=False): + self.crop_width = crop_width + self.crop_height = crop_height + self.corner_points_weight = corner_points_weight + self.entropy_points_weight = entropy_points_weight + self.face_points_weight = entropy_points_weight + self.annotate_image = annotate_image + self.destop_view_image = False \ No newline at end of file diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 7c1a594e..0c79f012 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -1,7 +1,5 @@ import os -import cv2 -import numpy as np -from PIL import Image, ImageOps, ImageDraw +from PIL import Image, ImageOps import platform import sys import tqdm @@ -9,6 +7,7 @@ import time from modules import shared, images from modules.shared import opts, cmd_opts +from modules.textual_inversion import autocrop if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru @@ -80,6 +79,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro if process_flip: save_pic_with_caption(ImageOps.mirror(image), index) + for index, imagefile in enumerate(tqdm.tqdm(files)): subindex = [0] filename = os.path.join(src, imagefile) @@ -118,37 +118,16 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro processing_option_ran = True - if process_entropy_focus and (is_tall or is_wide): - if is_tall: - img = img.resize((width, height * img.height // img.width)) - else: - img = img.resize((width * img.width // img.height, height)) - - x_focal_center, y_focal_center = image_central_focal_point(img, width, height) - - # take the focal point and turn it into crop coordinates that try to center over the focal - # point but then get adjusted back into the frame - y_half = int(height / 2) - x_half = int(width / 2) - - x1 = x_focal_center - x_half - if x1 < 0: - x1 = 0 - elif x1 + width > img.width: - x1 = img.width - width - - y1 = y_focal_center - y_half - if y1 < 0: - y1 = 0 - elif y1 + height > img.height: - y1 = img.height - height - - x2 = x1 + width - y2 = y1 + height - - crop = [x1, y1, x2, y2] - - focal = img.crop(tuple(crop)) + if process_entropy_focus and img.height != img.width: + autocrop_settings = autocrop.Settings( + crop_width = width, + crop_height = height, + face_points_weight = 0.9, + entropy_points_weight = 0.7, + corner_points_weight = 0.5, + annotate_image = False + ) + focal = autocrop.crop_image(img, autocrop_settings) save_pic(focal, index) processing_option_ran = True @@ -157,105 +136,4 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro img = images.resize_image(1, img, width, height) save_pic(img, index) - shared.state.nextjob() - - -def image_central_focal_point(im, target_width, target_height): - focal_points = [] - - focal_points.extend( - image_focal_points(im) - ) - - fp_entropy = image_entropy_point(im, target_width, target_height) - fp_entropy['weight'] = len(focal_points) + 1 # about half of the weight to entropy - - focal_points.append(fp_entropy) - - weight = 0.0 - x = 0.0 - y = 0.0 - for focal_point in focal_points: - weight += focal_point['weight'] - x += focal_point['x'] * focal_point['weight'] - y += focal_point['y'] * focal_point['weight'] - avg_x = round(x // weight) - avg_y = round(y // weight) - - return avg_x, avg_y - - -def image_focal_points(im): - grayscale = im.convert("L") - - # naive attempt at preventing focal points from collecting at watermarks near the bottom - gd = ImageDraw.Draw(grayscale) - gd.rectangle([0, im.height*.9, im.width, im.height], fill="#999") - - np_im = np.array(grayscale) - - points = cv2.goodFeaturesToTrack( - np_im, - maxCorners=100, - qualityLevel=0.04, - minDistance=min(grayscale.width, grayscale.height)*0.07, - useHarrisDetector=False, - ) - - if points is None: - return [] - - focal_points = [] - for point in points: - x, y = point.ravel() - focal_points.append({ - 'x': x, - 'y': y, - 'weight': 1.0 - }) - - return focal_points - - -def image_entropy_point(im, crop_width, crop_height): - landscape = im.height < im.width - portrait = im.height > im.width - if landscape: - move_idx = [0, 2] - move_max = im.size[0] - elif portrait: - move_idx = [1, 3] - move_max = im.size[1] - - e_max = 0 - crop_current = [0, 0, crop_width, crop_height] - crop_best = crop_current - while crop_current[move_idx[1]] < move_max: - crop = im.crop(tuple(crop_current)) - e = image_entropy(crop) - - if (e > e_max): - e_max = e - crop_best = list(crop_current) - - crop_current[move_idx[0]] += 4 - crop_current[move_idx[1]] += 4 - - x_mid = int(crop_best[0] + crop_width/2) - y_mid = int(crop_best[1] + crop_height/2) - - - return { - 'x': x_mid, - 'y': y_mid, - 'weight': 1.0 - } - - -def image_entropy(im): - # greyscale image entropy - band = np.asarray(im.convert("1")) - hist, _ = np.histogram(band, bins=range(0, 256)) - hist = hist[hist > 0] - return -np.log2(hist / hist.sum()).sum() - + shared.state.nextjob() \ No newline at end of file -- cgit v1.2.3 From db8ed5fe5cd6e967d12d43d96b7f83083e58626c Mon Sep 17 00:00:00 2001 From: captin411 Date: Tue, 25 Oct 2022 15:22:29 -0700 Subject: Focal crop UI elements --- modules/textual_inversion/preprocess.py | 26 +++++++++++++------------- modules/ui.py | 20 ++++++++++++++++++-- 2 files changed, 31 insertions(+), 15 deletions(-) (limited to 'modules/textual_inversion/preprocess.py') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index a8c17c6f..1e4d4de8 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -13,7 +13,7 @@ if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru -def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_entropy_focus=False): +def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False): try: if process_caption: shared.interrogator.load() @@ -23,7 +23,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce db_opts[deepbooru.OPT_INCLUDE_RANKS] = False deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) - preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_entropy_focus) + preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug) finally: @@ -35,7 +35,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce -def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_entropy_focus=False): +def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False): width = process_width height = process_height src = os.path.abspath(process_src) @@ -139,27 +139,27 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre ratio = (img.height * width) / (img.width * height) inverse_xy = True - processing_option_ran = False + process_default_resize = True if process_split and ratio < 1.0 and ratio <= split_threshold: for splitted in split_pic(img, inverse_xy): save_pic(splitted, index, existing_caption=existing_caption) - processing_option_ran = True + process_default_resize = False if process_entropy_focus and img.height != img.width: autocrop_settings = autocrop.Settings( crop_width = width, crop_height = height, - face_points_weight = 0.9, - entropy_points_weight = 0.7, - corner_points_weight = 0.5, - annotate_image = False + face_points_weight = process_focal_crop_face_weight, + entropy_points_weight = process_focal_crop_entropy_weight, + corner_points_weight = process_focal_crop_edges_weight, + annotate_image = process_focal_crop_debug ) - focal = autocrop.crop_image(img, autocrop_settings) - save_pic(focal, index, existing_caption=existing_caption) - processing_option_ran = True + for focal in autocrop.crop_image(img, autocrop_settings): + save_pic(focal, index, existing_caption=existing_caption) + process_default_resize = False - if not processing_option_ran: + if process_default_resize: img = images.resize_image(1, img, width, height) save_pic(img, index, existing_caption=existing_caption) diff --git a/modules/ui.py b/modules/ui.py index 028eb4e5..95b9c703 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1260,7 +1260,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images') - process_entropy_focus = gr.Checkbox(label='Create auto focal point crop') + process_focal_crop = gr.Checkbox(label='Auto focal point crop') process_caption = gr.Checkbox(label='Use BLIP for caption') process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) @@ -1268,6 +1268,12 @@ def create_ui(wrap_gradio_gpu_call): process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05) process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05) + with gr.Row(visible=False) as process_focal_crop_row: + process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.3, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05) + process_focal_crop_debug = gr.Checkbox(label='Create debug image') + with gr.Row(): with gr.Column(scale=3): gr.HTML(value="") @@ -1281,6 +1287,12 @@ def create_ui(wrap_gradio_gpu_call): outputs=[process_split_extra_row], ) + process_focal_crop.change( + fn=lambda show: gr_show(show), + inputs=[process_focal_crop], + outputs=[process_focal_crop_row], + ) + with gr.Tab(label="Train"): gr.HTML(value="

Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images [wiki]

") with gr.Row(): @@ -1368,7 +1380,11 @@ def create_ui(wrap_gradio_gpu_call): process_caption_deepbooru, process_split_threshold, process_overlap_ratio, - process_entropy_focus, + process_focal_crop, + process_focal_crop_face_weight, + process_focal_crop_entropy_weight, + process_focal_crop_edges_weight, + process_focal_crop_debug, ], outputs=[ ti_output, -- cgit v1.2.3 From 54f0c1482427a5b3f2248b97be55878e742cbcb1 Mon Sep 17 00:00:00 2001 From: captin411 Date: Tue, 25 Oct 2022 16:14:13 -0700 Subject: download better face detection module dynamically --- modules/textual_inversion/autocrop.py | 20 ++++++++++++++++++++ modules/textual_inversion/preprocess.py | 13 +++++++++++-- requirements.txt | 2 ++ 3 files changed, 33 insertions(+), 2 deletions(-) (limited to 'modules/textual_inversion/preprocess.py') diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py index caaf18c8..01a92b12 100644 --- a/modules/textual_inversion/autocrop.py +++ b/modules/textual_inversion/autocrop.py @@ -1,4 +1,5 @@ import cv2 +import requests import os from collections import defaultdict from math import log, sqrt @@ -293,6 +294,25 @@ def is_square(w, h): return w == h +def download_and_cache_models(dirname): + download_url = 'https://github.com/opencv/opencv_zoo/blob/91fb0290f50896f38a0ab1e558b74b16bc009428/models/face_detection_yunet/face_detection_yunet_2022mar.onnx?raw=true' + model_file_name = 'face_detection_yunet.onnx' + + if not os.path.exists(dirname): + os.makedirs(dirname) + + cache_file = os.path.join(dirname, model_file_name) + if not os.path.exists(cache_file): + print(f"downloading face detection model from '{download_url}' to '{cache_file}'") + response = requests.get(download_url) + with open(cache_file, "wb") as f: + f.write(response.content) + + if os.path.exists(cache_file): + return cache_file + return None + + class PointOfInterest: def __init__(self, x, y, weight=1.0, size=10): self.x = x diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 1e4d4de8..e13b1894 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,6 +7,7 @@ import tqdm import time from modules import shared, images +from modules.paths import models_path from modules.shared import opts, cmd_opts from modules.textual_inversion import autocrop if cmd_opts.deepdanbooru: @@ -146,14 +147,22 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre save_pic(splitted, index, existing_caption=existing_caption) process_default_resize = False - if process_entropy_focus and img.height != img.width: + if process_focal_crop and img.height != img.width: + + dnn_model_path = None + try: + dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv")) + except Exception as e: + print("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", e) + autocrop_settings = autocrop.Settings( crop_width = width, crop_height = height, face_points_weight = process_focal_crop_face_weight, entropy_points_weight = process_focal_crop_entropy_weight, corner_points_weight = process_focal_crop_edges_weight, - annotate_image = process_focal_crop_debug + annotate_image = process_focal_crop_debug, + dnn_model_path = dnn_model_path, ) for focal in autocrop.crop_image(img, autocrop_settings): save_pic(focal, index, existing_caption=existing_caption) diff --git a/requirements.txt b/requirements.txt index da1969cf..75b37c4f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,8 @@ gradio==3.5 invisible-watermark numpy omegaconf +opencv-python +requests piexif Pillow pytorch_lightning -- cgit v1.2.3