Update config.json (#2)
Browse files- Update config.json (06eb4299041c01d59d9308474b6ed27307027537)
Co-authored-by: Yu Wang <[email protected]>
- config.json +1 -18
config.json
CHANGED
|
@@ -36,17 +36,12 @@
|
|
| 36 |
},
|
| 37 |
"add_special_tokens": true,
|
| 38 |
"architectures": [
|
| 39 |
-
"
|
| 40 |
],
|
| 41 |
"attn_implementation": "flash_attention_2",
|
| 42 |
-
"cotrain_vlm": false,
|
| 43 |
"diffusion_cfg": {
|
| 44 |
"_target_": "alpamayo_r1.diffusion.flow_matching.FlowMatching",
|
| 45 |
-
"inference_guidance_weight": 3.0,
|
| 46 |
"int_method": "euler",
|
| 47 |
-
"train_ignore_guidance_rate": 0.1,
|
| 48 |
-
"train_timestep_sampler": "beta",
|
| 49 |
-
"use_classifier_free_guidance": false,
|
| 50 |
"x_dims": "???"
|
| 51 |
},
|
| 52 |
"dtype": "bfloat16",
|
|
@@ -57,29 +52,17 @@
|
|
| 57 |
"intermediate_size": 8256,
|
| 58 |
"num_attention_heads": 16
|
| 59 |
},
|
| 60 |
-
"expert_hist_traj_tokenizer_cfg": null,
|
| 61 |
"expert_non_causal_attention": true,
|
| 62 |
-
"hist_traj_embed_cfg": null,
|
| 63 |
"hist_traj_tokenizer_cfg": {
|
| 64 |
"_target_": "alpamayo_r1.models.delta_tokenizer.DeltaTrajectoryTokenizer"
|
| 65 |
},
|
| 66 |
-
"image_height": 320,
|
| 67 |
-
"image_width": 512,
|
| 68 |
-
"include_camera_ids": false,
|
| 69 |
"keep_same_dtype": true,
|
| 70 |
-
"legacy_inference_image_input_format": false,
|
| 71 |
-
"loss_weights": {
|
| 72 |
-
"future_traj": 1.0,
|
| 73 |
-
"others": 1.0
|
| 74 |
-
},
|
| 75 |
"max_pixels": 196608,
|
| 76 |
"min_pixels": 163840,
|
| 77 |
"model_dtype": "bfloat16",
|
| 78 |
"model_type": "alpamayo_r1",
|
| 79 |
-
"stop_grad_from_vlm": true,
|
| 80 |
"tokens_per_future_traj": 128,
|
| 81 |
"tokens_per_history_traj": 48,
|
| 82 |
-
"traj_loss_weight": 1.0,
|
| 83 |
"traj_token_ids": {
|
| 84 |
"future": 155685,
|
| 85 |
"future_end": 155683,
|
|
|
|
| 36 |
},
|
| 37 |
"add_special_tokens": true,
|
| 38 |
"architectures": [
|
| 39 |
+
"AlpamayoR1"
|
| 40 |
],
|
| 41 |
"attn_implementation": "flash_attention_2",
|
|
|
|
| 42 |
"diffusion_cfg": {
|
| 43 |
"_target_": "alpamayo_r1.diffusion.flow_matching.FlowMatching",
|
|
|
|
| 44 |
"int_method": "euler",
|
|
|
|
|
|
|
|
|
|
| 45 |
"x_dims": "???"
|
| 46 |
},
|
| 47 |
"dtype": "bfloat16",
|
|
|
|
| 52 |
"intermediate_size": 8256,
|
| 53 |
"num_attention_heads": 16
|
| 54 |
},
|
|
|
|
| 55 |
"expert_non_causal_attention": true,
|
|
|
|
| 56 |
"hist_traj_tokenizer_cfg": {
|
| 57 |
"_target_": "alpamayo_r1.models.delta_tokenizer.DeltaTrajectoryTokenizer"
|
| 58 |
},
|
|
|
|
|
|
|
|
|
|
| 59 |
"keep_same_dtype": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
"max_pixels": 196608,
|
| 61 |
"min_pixels": 163840,
|
| 62 |
"model_dtype": "bfloat16",
|
| 63 |
"model_type": "alpamayo_r1",
|
|
|
|
| 64 |
"tokens_per_future_traj": 128,
|
| 65 |
"tokens_per_history_traj": 48,
|
|
|
|
| 66 |
"traj_token_ids": {
|
| 67 |
"future": 155685,
|
| 68 |
"future_end": 155683,
|