From 75336dfc84cae280036bc52a6805eb10d9ae30ba Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:38:52 +0800 Subject: add TAESD for i2i and t2i --- modules/sd_samplers_common.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'modules/sd_samplers_common.py') diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 5deda761..5a45e8eb 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -23,19 +23,29 @@ def setup_img2img_steps(p, steps=None): approximation_indexes = {"Full": 0, "Approx NN": 1, "Approx cheap": 2, "TAESD": 3} -def single_sample_to_image(sample, approximation=None): +def samples_to_images_tensor(sample, approximation=None, model=None): + '''latents -> images [-1, 1]''' if approximation is None: approximation = approximation_indexes.get(opts.show_progress_type, 0) if approximation == 2: - x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5 + x_sample = sd_vae_approx.cheap_approximation(sample) elif approximation == 1: - x_sample = sd_vae_approx.model()(sample.to(devices.device, devices.dtype).unsqueeze(0))[0].detach() * 0.5 + 0.5 + x_sample = sd_vae_approx.model()(sample.to(devices.device, devices.dtype)).detach() elif approximation == 3: x_sample = sample * 1.5 - x_sample = sd_vae_taesd.model()(x_sample.to(devices.device, devices.dtype).unsqueeze(0))[0].detach() + x_sample = sd_vae_taesd.decoder_model()(x_sample.to(devices.device, devices.dtype)).detach() + x_sample = x_sample * 2 - 1 else: - x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5 + if model is None: + model = shared.sd_model + x_sample = model.decode_first_stage(sample) + + return x_sample + + +def single_sample_to_image(sample, approximation=None): + x_sample = samples_to_images_tensor(sample.unsqueeze(0), approximation)[0] * 0.5 + 0.5 x_sample = torch.clamp(x_sample, min=0.0, max=1.0) x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) @@ -52,6 +62,24 @@ def samples_to_image_grid(samples, approximation=None): return images.image_grid([single_sample_to_image(sample, approximation) for sample in samples]) +def images_tensor_to_samples(image, approximation=None, model=None): + '''image[0, 1] -> latent''' + if approximation is None: + approximation = approximation_indexes.get(opts.sd_vae_encode_method, 0) + + if approximation == 3: + image = image.to(devices.device, devices.dtype) + x_latent = sd_vae_taesd.encoder_model()(image) / 1.5 + else: + if model is None: + model = shared.sd_model + image = image.to(shared.device, dtype=devices.dtype_vae) + image = image * 2 - 1 + x_latent = model.get_first_stage_encoding(model.encode_first_stage(image)) + + return x_latent + + def store_latent(decoded): state.current_latent = decoded -- cgit v1.2.3 From c134a480164bef017cd4b33fae57a31a86556beb Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:40:20 +0800 Subject: Fix code style --- modules/sd_samplers_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/sd_samplers_common.py') diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 5a45e8eb..d444cac1 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -2,7 +2,7 @@ from collections import namedtuple import numpy as np import torch from PIL import Image -from modules import devices, processing, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared +from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared from modules.shared import opts, state SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options']) @@ -40,7 +40,7 @@ def samples_to_images_tensor(sample, approximation=None, model=None): if model is None: model = shared.sd_model x_sample = model.decode_first_stage(sample) - + return x_sample -- cgit v1.2.3 From 1f6bfdea80f58f292aeebb9a001689a118d71c01 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 4 Aug 2023 14:38:52 +0800 Subject: move the modified decode into smapler_common --- modules/sd_samplers_common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/sd_samplers_common.py') diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 2cfa4ac6..7269514f 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -55,9 +55,9 @@ def single_sample_to_image(sample, approximation=None): def decode_first_stage(model, x): - x = model.decode_first_stage(x.to(devices.dtype_vae)) - - return x + x = x.to(devices.dtype_vae) + approx_index = approximation_indexes.get(opts.sd_vae_decode_method, 0) + return samples_to_images_tensor(x, approx_index, model) def sample_to_image(samples, index=0, approximation=None): -- cgit v1.2.3 From 094c416a801b16c7d8e1944e2e9fae2c9e98bf12 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 4 Aug 2023 17:53:16 +0800 Subject: change all encode --- modules/processing.py | 14 ++++++-------- modules/sd_samplers_common.py | 2 +- run.ps1 | 1 + run_local.ps1 | 3 +++ update.ps1 | 1 + 5 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 run.ps1 create mode 100644 run_local.ps1 create mode 100644 update.ps1 (limited to 'modules/sd_samplers_common.py') diff --git a/modules/processing.py b/modules/processing.py index aae39866..544667a4 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -16,6 +16,7 @@ from typing import Any, Dict, List import modules.sd_hijack from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors from modules.sd_hijack import model_hijack +from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes from modules.shared import opts, cmd_opts, state import modules.shared as shared import modules.paths as paths @@ -30,7 +31,6 @@ from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion from einops import repeat, rearrange from blendmodes.blend import blendLayers, BlendType -decode_first_stage = sd_samplers_common.decode_first_stage # some of those options should not be changed at all because they would break the model, so I removed them from options. opt_C = 4 @@ -84,7 +84,7 @@ def txt2img_image_conditioning(sd_model, x, width, height): # The "masked-image" in this case will just be all zeros since the entire image is masked. image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device) - image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning)) + image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method)) # Add the fake full 1s mask to the first dimension. image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) @@ -203,7 +203,7 @@ class StableDiffusionProcessing: midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device) midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size) - conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image)) + conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) conditioning = torch.nn.functional.interpolate( self.sd_model.depth_model(midas_in), size=conditioning_image.shape[2:], @@ -216,7 +216,7 @@ class StableDiffusionProcessing: return conditioning def edit_image_conditioning(self, source_image): - conditioning_image = self.sd_model.encode_first_stage(source_image).mode() + conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) return conditioning_image @@ -255,7 +255,7 @@ class StableDiffusionProcessing: ) # Encode the new masked image using first stage of network. - conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) + conditioning_image = images_tensor_to_samples(conditioning_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) # Create the concatenated conditioning tensor to be fed to `c_concat` conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=latent_image.shape[-2:]) @@ -1099,9 +1099,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): decoded_samples = torch.from_numpy(np.array(batch_images)) decoded_samples = decoded_samples.to(shared.device) - decoded_samples = 2. * decoded_samples - 1. - samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples)) + samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method)) image_conditioning = self.img2img_image_conditioning(decoded_samples, samples) @@ -1339,7 +1338,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less") image = torch.from_numpy(batch_images) - from modules.sd_samplers_common import images_tensor_to_samples, approximation_indexes self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model) devices.torch_gc() diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 7269514f..42a29fc9 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -75,7 +75,7 @@ def images_tensor_to_samples(image, approximation=None, model=None): if approximation == 3: image = image.to(devices.device, devices.dtype) - x_latent = sd_vae_taesd.encoder_model()(image) / 1.5 + x_latent = sd_vae_taesd.encoder_model()(image) else: if model is None: model = shared.sd_model diff --git a/run.ps1 b/run.ps1 new file mode 100644 index 00000000..82c1660b --- /dev/null +++ b/run.ps1 @@ -0,0 +1 @@ +.\venv\Scripts\accelerate-launch.exe --num_cpu_threads_per_process=6 --api .\launch.py --listen --port 17415 --xformers --opt-channelslast \ No newline at end of file diff --git a/run_local.ps1 b/run_local.ps1 new file mode 100644 index 00000000..e2ac43db --- /dev/null +++ b/run_local.ps1 @@ -0,0 +1,3 @@ +.\venv\Scripts\Activate.ps1 +python .\launch.py --xformers --opt-channelslast --api +. $PSCommandPath \ No newline at end of file diff --git a/update.ps1 b/update.ps1 new file mode 100644 index 00000000..9960bead --- /dev/null +++ b/update.ps1 @@ -0,0 +1 @@ +git stash push && git pull --rebase && git stash pop \ No newline at end of file -- cgit v1.2.3 From 6346d8eeaa17ba0f7e41618908519f6e9bfe07e0 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 4 Aug 2023 17:53:30 +0800 Subject: Revert "change all encode" This reverts commit 094c416a801b16c7d8e1944e2e9fae2c9e98bf12. --- modules/processing.py | 14 ++++++++------ modules/sd_samplers_common.py | 2 +- run.ps1 | 1 - run_local.ps1 | 3 --- update.ps1 | 1 - 5 files changed, 9 insertions(+), 12 deletions(-) delete mode 100644 run.ps1 delete mode 100644 run_local.ps1 delete mode 100644 update.ps1 (limited to 'modules/sd_samplers_common.py') diff --git a/modules/processing.py b/modules/processing.py index 544667a4..aae39866 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -16,7 +16,6 @@ from typing import Any, Dict, List import modules.sd_hijack from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors from modules.sd_hijack import model_hijack -from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes from modules.shared import opts, cmd_opts, state import modules.shared as shared import modules.paths as paths @@ -31,6 +30,7 @@ from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion from einops import repeat, rearrange from blendmodes.blend import blendLayers, BlendType +decode_first_stage = sd_samplers_common.decode_first_stage # some of those options should not be changed at all because they would break the model, so I removed them from options. opt_C = 4 @@ -84,7 +84,7 @@ def txt2img_image_conditioning(sd_model, x, width, height): # The "masked-image" in this case will just be all zeros since the entire image is masked. image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device) - image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method)) + image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning)) # Add the fake full 1s mask to the first dimension. image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) @@ -203,7 +203,7 @@ class StableDiffusionProcessing: midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device) midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size) - conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image)) conditioning = torch.nn.functional.interpolate( self.sd_model.depth_model(midas_in), size=conditioning_image.shape[2:], @@ -216,7 +216,7 @@ class StableDiffusionProcessing: return conditioning def edit_image_conditioning(self, source_image): - conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) + conditioning_image = self.sd_model.encode_first_stage(source_image).mode() return conditioning_image @@ -255,7 +255,7 @@ class StableDiffusionProcessing: ) # Encode the new masked image using first stage of network. - conditioning_image = images_tensor_to_samples(conditioning_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method)) + conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image)) # Create the concatenated conditioning tensor to be fed to `c_concat` conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=latent_image.shape[-2:]) @@ -1099,8 +1099,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): decoded_samples = torch.from_numpy(np.array(batch_images)) decoded_samples = decoded_samples.to(shared.device) + decoded_samples = 2. * decoded_samples - 1. - samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method)) + samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples)) image_conditioning = self.img2img_image_conditioning(decoded_samples, samples) @@ -1338,6 +1339,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less") image = torch.from_numpy(batch_images) + from modules.sd_samplers_common import images_tensor_to_samples, approximation_indexes self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model) devices.torch_gc() diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 42a29fc9..7269514f 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -75,7 +75,7 @@ def images_tensor_to_samples(image, approximation=None, model=None): if approximation == 3: image = image.to(devices.device, devices.dtype) - x_latent = sd_vae_taesd.encoder_model()(image) + x_latent = sd_vae_taesd.encoder_model()(image) / 1.5 else: if model is None: model = shared.sd_model diff --git a/run.ps1 b/run.ps1 deleted file mode 100644 index 82c1660b..00000000 --- a/run.ps1 +++ /dev/null @@ -1 +0,0 @@ -.\venv\Scripts\accelerate-launch.exe --num_cpu_threads_per_process=6 --api .\launch.py --listen --port 17415 --xformers --opt-channelslast \ No newline at end of file diff --git a/run_local.ps1 b/run_local.ps1 deleted file mode 100644 index e2ac43db..00000000 --- a/run_local.ps1 +++ /dev/null @@ -1,3 +0,0 @@ -.\venv\Scripts\Activate.ps1 -python .\launch.py --xformers --opt-channelslast --api -. $PSCommandPath \ No newline at end of file diff --git a/update.ps1 b/update.ps1 deleted file mode 100644 index 9960bead..00000000 --- a/update.ps1 +++ /dev/null @@ -1 +0,0 @@ -git stash push && git pull --rebase && git stash pop \ No newline at end of file -- cgit v1.2.3 From 073342c8878adc208be1eaab2705ba865d7b3ea1 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 4 Aug 2023 17:55:52 +0800 Subject: remove noneed scale --- modules/sd_samplers_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/sd_samplers_common.py') diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 7269514f..42a29fc9 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -75,7 +75,7 @@ def images_tensor_to_samples(image, approximation=None, model=None): if approximation == 3: image = image.to(devices.device, devices.dtype) - x_latent = sd_vae_taesd.encoder_model()(image) / 1.5 + x_latent = sd_vae_taesd.encoder_model()(image) else: if model is None: model = shared.sd_model -- cgit v1.2.3