aboutsummaryrefslogtreecommitdiffstats
path: root/v2-inference-v.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'v2-inference-v.yaml')
-rw-r--r--v2-inference-v.yaml68
1 files changed, 0 insertions, 68 deletions
diff --git a/v2-inference-v.yaml b/v2-inference-v.yaml
deleted file mode 100644
index 513cd635..00000000
--- a/v2-inference-v.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-model:
- base_learning_rate: 1.0e-4
- target: ldm.models.diffusion.ddpm.LatentDiffusion
- params:
- parameterization: "v"
- linear_start: 0.00085
- linear_end: 0.0120
- num_timesteps_cond: 1
- log_every_t: 200
- timesteps: 1000
- first_stage_key: "jpg"
- cond_stage_key: "txt"
- image_size: 64
- channels: 4
- cond_stage_trainable: false
- conditioning_key: crossattn
- monitor: val/loss_simple_ema
- scale_factor: 0.18215
- use_ema: False # we set this to false because this is an inference only config
-
- unet_config:
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
- params:
- use_checkpoint: True
- use_fp16: True
- image_size: 32 # unused
- in_channels: 4
- out_channels: 4
- model_channels: 320
- attention_resolutions: [ 4, 2, 1 ]
- num_res_blocks: 2
- channel_mult: [ 1, 2, 4, 4 ]
- num_head_channels: 64 # need to fix for flash-attn
- use_spatial_transformer: True
- use_linear_in_transformer: True
- transformer_depth: 1
- context_dim: 1024
- legacy: False
-
- first_stage_config:
- target: ldm.models.autoencoder.AutoencoderKL
- params:
- embed_dim: 4
- monitor: val/rec_loss
- ddconfig:
- #attn_type: "vanilla-xformers"
- double_z: true
- z_channels: 4
- resolution: 256
- in_channels: 3
- out_ch: 3
- ch: 128
- ch_mult:
- - 1
- - 2
- - 4
- - 4
- num_res_blocks: 2
- attn_resolutions: []
- dropout: 0.0
- lossconfig:
- target: torch.nn.Identity
-
- cond_stage_config:
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
- params:
- freeze: True
- layer: "penultimate" \ No newline at end of file