Add model files for Alpamayo R1 10b (#1)

Browse files

- Add model files from AR1-test (1834af8ee83df0f3690bfae4e9b607fd4e54a383)
- Update config.json (fa32ec79664ba72d56f478e7d7870b67c21697e6)

Co-authored-by: Yu Wang <[email protected]>

Files changed (7) hide show

config.json +134 -0
model-00001-of-00005.safetensors +3 -0
model-00002-of-00005.safetensors +3 -0
model-00003-of-00005.safetensors +3 -0
model-00004-of-00005.safetensors +3 -0
model-00005-of-00005.safetensors +3 -0
model.safetensors.index.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,134 @@

+{
+  "action_in_proj_cfg": {
+    "_target_": "alpamayo_r1.models.action_in_proj.PerWaypointActionInProjV2",
+    "hidden_size": 512,
+    "max_freq": 100.0,
+    "num_enc_layers": 2,
+    "num_fourier_feats": 20
+  },
+  "action_out_proj_cfg": {
+    "_target_": "torch.nn.Linear"
+  },
+  "action_space_cfg": {
+    "_target_": "alpamayo_r1.action_space.UnicycleAccelCurvatureActionSpace",
+    "a_lambda": 0.0001,
+    "a_ridge": 0.0001,
+    "accel_bounds": [
+      -9.8,
+      9.8
+    ],
+    "accel_mean": 0.02902694707164455,
+    "accel_std": 0.6810426736454882,
+    "curvature_bounds": [
+      -0.33,
+      0.33
+    ],
+    "curvature_mean": 0.0002692167976330542,
+    "curvature_std": 0.026148280660833106,
+    "dt": 0.1,
+    "kappa_lambda": 0.0001,
+    "kappa_ridge": 0.0001,
+    "n_waypoints": 64,
+    "theta_lambda": 1e-06,
+    "theta_ridge": 1e-08,
+    "v_lambda": 1e-06,
+    "v_ridge": 0.0001
+  },
+  "add_special_tokens": true,
+  "architectures": [
+    "ExpertModel"
+  ],
+  "attn_implementation": "flash_attention_2",
+  "cotrain_vlm": false,
+  "diffusion_cfg": {
+    "_target_": "alpamayo_r1.diffusion.flow_matching.FlowMatching",
+    "inference_guidance_weight": 3.0,
+    "int_method": "euler",
+    "train_ignore_guidance_rate": 0.1,
+    "train_timestep_sampler": "beta",
+    "use_classifier_free_guidance": false,
+    "x_dims": "???"
+  },
+  "dtype": "bfloat16",
+  "expert_cfg": {
+    "dtype": "bfloat16",
+    "head_dim": 128,
+    "hidden_size": 2048,
+    "intermediate_size": 8256,
+    "num_attention_heads": 16
+  },
+  "expert_hist_traj_tokenizer_cfg": null,
+  "expert_non_causal_attention": true,
+  "hist_traj_embed_cfg": null,
+  "hist_traj_tokenizer_cfg": {
+    "_target_": "alpamayo_r1.models.delta_tokenizer.DeltaTrajectoryTokenizer"
+  },
+  "image_height": 320,
+  "image_width": 512,
+  "include_camera_ids": false,
+  "keep_same_dtype": true,
+  "legacy_inference_image_input_format": false,
+  "loss_weights": {
+    "future_traj": 1.0,
+    "others": 1.0
+  },
+  "max_pixels": 196608,
+  "min_pixels": 163840,
+  "model_dtype": "bfloat16",
+  "model_type": "alpamayo_r1",
+  "stop_grad_from_vlm": true,
+  "tokens_per_future_traj": 128,
+  "tokens_per_history_traj": 48,
+  "traj_loss_weight": 1.0,
+  "traj_token_ids": {
+    "future": 155685,
+    "future_end": 155683,
+    "future_start": 155681,
+    "history": 155684,
+    "history_end": 155676,
+    "history_start": 155674
+  },
+  "traj_token_start_idx": 151669,
+  "traj_tokenizer_cfg": {
+    "_recursive_": false,
+    "_target_": "alpamayo_r1.action_space.discrete_action_space.DiscreteTrajectoryTokenizer",
+    "action_space_cfg": {
+      "_target_": "alpamayo_r1.action_space.UnicycleAccelCurvatureActionSpace",
+      "a_lambda": 0.0001,
+      "a_ridge": 0.0001,
+      "accel_bounds": [
+        -9.8,
+        9.8
+      ],
+      "accel_mean": 0.02902694707164455,
+      "accel_std": 0.6810426736454882,
+      "curvature_bounds": [
+        -0.33,
+        0.33
+      ],
+      "curvature_mean": 0.0002692167976330542,
+      "curvature_std": 0.026148280660833106,
+      "dt": 0.1,
+      "kappa_lambda": 0.0001,
+      "kappa_ridge": 0.0001,
+      "n_waypoints": 64,
+      "theta_lambda": 1e-06,
+      "theta_ridge": 1e-08,
+      "v_lambda": 1e-06,
+      "v_ridge": 0.0001
+    },
+    "dims_max": [
+      10,
+      10
+    ],
+    "dims_min": [
+      -10,
+      -10
+    ],
+    "num_bins": 3000
+  },
+  "traj_vocab_size": 4000,
+  "transformers_version": "4.57.1",
+  "vlm_backend": "qwenvl3",
+  "vocab_size": 155697
+}

model-00001-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6aabd8d143cff0295a60b515dfcb5ba6a5b1b5acf7cea1d6c6254ed653d35965
+size 4928204944

model-00002-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e346fcf2bf4ebd75853bd17b5744c4158cf621aba195aad7c7e2f1e484c1ae20
+size 4915963032

model-00003-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7acad9950402f002825aa41048155ea1a8a2fb5f7a00501154d52828291171c9
+size 4983071160

model-00004-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59f3ec7a6983ae2654b7ffe5d79f59d3d523b8a71a0ffc83362816886636ac0a
+size 4980341192

model-00005-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5753b1fd57638b70db4882cc8061eedfa28c2748a37414eb4032e20b1bb2e4c1
+size 2349614880

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff