diff options
Diffstat (limited to 'configs/v1-inpainting-inference.yaml')
| -rw-r--r-- | configs/v1-inpainting-inference.yaml | 70 | 
1 files changed, 70 insertions, 0 deletions
| diff --git a/configs/v1-inpainting-inference.yaml b/configs/v1-inpainting-inference.yaml new file mode 100644 index 00000000..f9eec37d --- /dev/null +++ b/configs/v1-inpainting-inference.yaml @@ -0,0 +1,70 @@ +model: +  base_learning_rate: 7.5e-05 +  target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion +  params: +    linear_start: 0.00085 +    linear_end: 0.0120 +    num_timesteps_cond: 1 +    log_every_t: 200 +    timesteps: 1000 +    first_stage_key: "jpg" +    cond_stage_key: "txt" +    image_size: 64 +    channels: 4 +    cond_stage_trainable: false   # Note: different from the one we trained before +    conditioning_key: hybrid   # important +    monitor: val/loss_simple_ema +    scale_factor: 0.18215 +    finetune_keys: null + +    scheduler_config: # 10000 warmup steps +      target: ldm.lr_scheduler.LambdaLinearScheduler +      params: +        warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch +        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases +        f_start: [ 1.e-6 ] +        f_max: [ 1. ] +        f_min: [ 1. ] + +    unet_config: +      target: ldm.modules.diffusionmodules.openaimodel.UNetModel +      params: +        image_size: 32 # unused +        in_channels: 9  # 4 data + 4 downscaled image + 1 mask +        out_channels: 4 +        model_channels: 320 +        attention_resolutions: [ 4, 2, 1 ] +        num_res_blocks: 2 +        channel_mult: [ 1, 2, 4, 4 ] +        num_heads: 8 +        use_spatial_transformer: True +        transformer_depth: 1 +        context_dim: 768 +        use_checkpoint: True +        legacy: False + +    first_stage_config: +      target: ldm.models.autoencoder.AutoencoderKL +      params: +        embed_dim: 4 +        monitor: val/rec_loss +        ddconfig: +          double_z: true +          z_channels: 4 +          resolution: 256 +          in_channels: 3 +          out_ch: 3 +          ch: 128 +          ch_mult: +          - 1 +          - 2 +          - 4 +          - 4 +          num_res_blocks: 2 +          attn_resolutions: [] +          dropout: 0.0 +        lossconfig: +          target: torch.nn.Identity + +    cond_stage_config: +      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder | 
