| model: | |
| learning_rate: 0.0003 | |
| target: dalle_main.DALLE_trainer | |
| params: | |
| ckpt_path: model.ckpt | |
| condition_model_path: | |
| condition_config_path: nucleus_vqgan.yaml | |
| vqgan_model_path: | |
| vqgan_config_path: threshold_vqgan.yaml | |
| image_key: threshold | |
| num_images: 2 | |
| dim: 768 | |
| num_text_tokens: 30 | |
| text_seq_len: 1000 | |
| depth: 32 | |
| heads: 16 | |
| dim_head: 64 | |
| attn_dropout: 0.1 | |
| ff_dropout: 0.1 | |
| sparse_attn: false | |
| attn_types: full | |
| rotary_emb: true | |
| loss_type: logits | |
| fixed_embedding: true | |
| text_embedding: bert | |