smartrichard commited on Feb 27, 2025

Commit

28e21c6

verified ·

1 Parent(s): 156b75f

Upload 65 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
README.md +67 -0
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
added_tokens.json +24 -0
all_results.json +8 -0
checkpoint-1876/README.md +202 -0
checkpoint-1876/adapter_config.json +34 -0
checkpoint-1876/adapter_model.safetensors +3 -0
checkpoint-1876/added_tokens.json +24 -0
checkpoint-1876/global_step1875/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
checkpoint-1876/global_step1875/mp_rank_00_model_states.pt +3 -0
checkpoint-1876/latest +1 -0
checkpoint-1876/merges.txt +0 -0
checkpoint-1876/rng_state.pth +3 -0
checkpoint-1876/scheduler.pt +3 -0
checkpoint-1876/special_tokens_map.json +31 -0
checkpoint-1876/tokenizer.json +3 -0
checkpoint-1876/tokenizer_config.json +209 -0
checkpoint-1876/trainer_state.json +2464 -0
checkpoint-1876/training_args.bin +3 -0
checkpoint-1876/vocab.json +0 -0
checkpoint-1876/zero_to_fp32.py +674 -0
checkpoint-2811/README.md +202 -0
checkpoint-2811/adapter_config.json +34 -0
checkpoint-2811/adapter_model.safetensors +3 -0
checkpoint-2811/added_tokens.json +24 -0
checkpoint-2811/global_step2810/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
checkpoint-2811/global_step2810/mp_rank_00_model_states.pt +3 -0
checkpoint-2811/latest +1 -0
checkpoint-2811/merges.txt +0 -0
checkpoint-2811/rng_state.pth +3 -0
checkpoint-2811/scheduler.pt +3 -0
checkpoint-2811/special_tokens_map.json +31 -0
checkpoint-2811/tokenizer.json +3 -0
checkpoint-2811/tokenizer_config.json +209 -0
checkpoint-2811/trainer_state.json +0 -0
checkpoint-2811/training_args.bin +3 -0
checkpoint-2811/vocab.json +0 -0
checkpoint-2811/zero_to_fp32.py +674 -0
checkpoint-938/README.md +202 -0
checkpoint-938/adapter_config.json +34 -0
checkpoint-938/adapter_model.safetensors +3 -0
checkpoint-938/added_tokens.json +24 -0
checkpoint-938/global_step937/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
checkpoint-938/global_step937/mp_rank_00_model_states.pt +3 -0
checkpoint-938/latest +1 -0
checkpoint-938/merges.txt +0 -0
checkpoint-938/rng_state.pth +3 -0
checkpoint-938/scheduler.pt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-1876/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-2811/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-938/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,67 @@

+---
+base_model: Qwen/Qwen2.5-1.5B
+datasets: xiaodongguaAIGC/X-R1-7500
+library_name: transformers
+tags:
+- generated_from_trainer
+- X-R1
+licence: license
+---
+# Model Card for None
+This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B](https://huggingface.co/Qwen/Qwen2.5-1.5B) on the [xiaodongguaAIGC/X-R1-7500](https://huggingface.co/datasets/xiaodongguaAIGC/X-R1-7500) dataset.
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/smartrichard_team1/huggingface/runs/rx351n7r)
+This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
+### Framework versions
+- TRL: 0.15.0
+- Transformers: 4.48.2
+- Pytorch: 2.5.1
+- Datasets: 3.3.2
+- Tokenizers: 0.21.0
+## Citations
+Cite GRPO as:
+```bibtex
+@article{zhihong2024deepseekmath,
+    title        = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
+    author       = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
+    year         = 2024,
+    eprint       = {arXiv:2402.03300},
+}
+```
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-1.5B",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "embed_tokens",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0e2d12773cde23612f66f956756bfff79b5088a590085701d068e152e8b9f0d
+size 488520640

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 0.0,
+    "train_loss": 0.21294908598650353,
+    "train_runtime": 95825.8938,
+    "train_samples": 7500,
+    "train_samples_per_second": 0.235,
+    "train_steps_per_second": 0.029
+}

checkpoint-1876/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen2.5-1.5B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-1876/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-1.5B",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "embed_tokens",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-1876/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8e46761f61f3bfddbd76a744c1714b7fdc6ee2a8fdc7e7a9a602efe22934a56
+size 488520640

checkpoint-1876/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-1876/global_step1875/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d4389c6c361a47aeae8b4f2e6246e904a7f0364e62f83400a8972e88f8c36db
+size 130520624

checkpoint-1876/global_step1875/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9db778bee26f573f90d8b34629fd5b990bcde4b1922cc2210ee73ed40b36e4c
+size 488645432

checkpoint-1876/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step1875

checkpoint-1876/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1876/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a7836d60f20134f3d9313f7612d26f0024f4c05fe0ccd1e58a97556452c2ebb
+size 14244

checkpoint-1876/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3499db549e30a7e2b1735bafe664c530646f254a8a7c3ec3b6b3d3c9d1138a84
+size 1064

checkpoint-1876/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-1876/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
+size 11422063

checkpoint-1876/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

checkpoint-1876/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2464 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 10,
+  "global_step": 1876,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "completion_length": 121.971875,
+      "epoch": 0.010666666666666666,
+      "grad_norm": 0.156667098402977,
+      "kl": 2.0313262939453126e-05,
+      "learning_rate": 1.0638297872340426e-05,
+      "loss": 0.001,
+      "reward": 0.0125,
+      "reward_std": 0.025,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.003125,
+      "step": 10
+    },
+    {
+      "completion_length": 122.521875,
+      "epoch": 0.021333333333333333,
+      "grad_norm": 0.0012713409960269928,
+      "kl": 0.00021836161613464355,
+      "learning_rate": 2.1276595744680852e-05,
+      "loss": 0.0051,
+      "reward": 0.015625,
+      "reward_std": 0.025966878235340118,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.0,
+      "step": 20
+    },
+    {
+      "completion_length": 117.5125,
+      "epoch": 0.032,
+      "grad_norm": 0.002654253738000989,
+      "kl": 0.0003068089485168457,
+      "learning_rate": 3.1914893617021275e-05,
+      "loss": -0.0002,
+      "reward": 0.00625,
+      "reward_std": 0.007216878235340118,
+      "rewards/accuracy_reward": 0.00625,
+      "rewards/format_reward": 0.0,
+      "step": 30
+    },
+    {
+      "completion_length": 118.871875,
+      "epoch": 0.042666666666666665,
+      "grad_norm": 0.00353299081325531,
+      "kl": 0.000412750244140625,
+      "learning_rate": 4.2553191489361704e-05,
+      "loss": 0.0055,
+      "reward": 0.009375,
+      "reward_std": 0.01875,
+      "rewards/accuracy_reward": 0.00625,
+      "rewards/format_reward": 0.003125,
+      "step": 40
+    },
+    {
+      "completion_length": 121.046875,
+      "epoch": 0.05333333333333334,
+      "grad_norm": 0.003619612194597721,
+      "kl": 0.0004070043563842773,
+      "learning_rate": 5.319148936170213e-05,
+      "loss": 0.0,
+      "reward": 0.0,
+      "reward_std": 0.0,
+      "rewards/accuracy_reward": 0.0,
+      "rewards/format_reward": 0.0,
+      "step": 50
+    },
+    {
+      "completion_length": 119.4375,
+      "epoch": 0.064,
+      "grad_norm": 0.11174867302179337,
+      "kl": 0.00045168399810791016,
+      "learning_rate": 6.382978723404255e-05,
+      "loss": 0.0064,
+      "reward": 0.01875,
+      "reward_std": 0.0375,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.003125,
+      "step": 60
+    },
+    {
+      "completion_length": 119.05625,
+      "epoch": 0.07466666666666667,
+      "grad_norm": 0.006828859448432922,
+      "kl": 0.0011888980865478516,
+      "learning_rate": 7.446808510638297e-05,
+      "loss": 0.0012,
+      "reward": 0.0125,
+      "reward_std": 0.025,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.0,
+      "step": 70
+    },
+    {
+      "completion_length": 120.28125,
+      "epoch": 0.08533333333333333,
+      "grad_norm": 0.0064537739381194115,
+      "kl": 0.0019659996032714844,
+      "learning_rate": 8.510638297872341e-05,
+      "loss": 0.0028,
+      "reward": 0.0125,
+      "reward_std": 0.025,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.0,
+      "step": 80
+    },
+    {
+      "completion_length": 117.559375,
+      "epoch": 0.096,
+      "grad_norm": 0.09068689495325089,
+      "kl": 0.0025023460388183595,
+      "learning_rate": 9.574468085106382e-05,
+      "loss": 0.003,
+      "reward": 0.021875,
+      "reward_std": 0.04375,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.003125,
+      "step": 90
+    },
+    {
+      "completion_length": 117.68125,
+      "epoch": 0.10666666666666667,
+      "grad_norm": 0.16541939973831177,
+      "kl": 0.00291900634765625,
+      "learning_rate": 0.00010638297872340425,
+      "loss": 0.0008,
+      "reward": 0.021875,
+      "reward_std": 0.03125,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.0,
+      "step": 100
+    },
+    {
+      "completion_length": 116.971875,
+      "epoch": 0.11733333333333333,
+      "grad_norm": 0.07206544280052185,
+      "kl": 0.0038990020751953126,
+      "learning_rate": 0.00011702127659574467,
+      "loss": 0.0026,
+      "reward": 0.015625,
+      "reward_std": 0.03125,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.003125,
+      "step": 110
+    },
+    {
+      "completion_length": 114.996875,
+      "epoch": 0.128,
+      "grad_norm": 0.02286006510257721,
+      "kl": 0.007346725463867188,
+      "learning_rate": 0.0001276595744680851,
+      "loss": 0.0076,
+      "reward": 0.025,
+      "reward_std": 0.05,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.009375,
+      "step": 120
+    },
+    {
+      "completion_length": 119.315625,
+      "epoch": 0.13866666666666666,
+      "grad_norm": 0.015629781410098076,
+      "kl": 0.008090972900390625,
+      "learning_rate": 0.00013829787234042552,
+      "loss": 0.0011,
+      "reward": 0.009375,
+      "reward_std": 0.01875,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.0,
+      "step": 130
+    },
+    {
+      "completion_length": 121.821875,
+      "epoch": 0.14933333333333335,
+      "grad_norm": 0.15498439967632294,
+      "kl": 0.006272506713867187,
+      "learning_rate": 0.00014893617021276593,
+      "loss": -0.0012,
+      "reward": 0.021875,
+      "reward_std": 0.03846687823534012,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.003125,
+      "step": 140
+    },
+    {
+      "completion_length": 121.409375,
+      "epoch": 0.16,
+      "grad_norm": 0.18756870925426483,
+      "kl": 0.00465240478515625,
+      "learning_rate": 0.00015957446808510637,
+      "loss": 0.0012,
+      "reward": 0.021875,
+      "reward_std": 0.03846687823534012,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.003125,
+      "step": 150
+    },
+    {
+      "completion_length": 118.26875,
+      "epoch": 0.17066666666666666,
+      "grad_norm": 0.011626984924077988,
+      "kl": 0.01092681884765625,
+      "learning_rate": 0.00017021276595744682,
+      "loss": -0.0011,
+      "reward": 0.021875,
+      "reward_std": 0.03318375647068024,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.003125,
+      "step": 160
+    },
+    {
+      "completion_length": 119.25,
+      "epoch": 0.18133333333333335,
+      "grad_norm": 0.00764912273734808,
+      "kl": 0.00976104736328125,
+      "learning_rate": 0.0001808510638297872,
+      "loss": 0.0045,
+      "reward": 0.021875,
+      "reward_std": 0.04375,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.0,
+      "step": 170
+    },
+    {
+      "completion_length": 115.35,
+      "epoch": 0.192,
+      "grad_norm": 0.0785018652677536,
+      "kl": 0.014077377319335938,
+      "learning_rate": 0.00019148936170212765,
+      "loss": 0.0037,
+      "reward": 0.025,
+      "reward_std": 0.04471687823534012,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.015625,
+      "step": 180
+    },
+    {
+      "completion_length": 108.6125,
+      "epoch": 0.20266666666666666,
+      "grad_norm": 0.13107918202877045,
+      "kl": 0.039361572265625,
+      "learning_rate": 0.00020212765957446807,
+      "loss": 0.0412,
+      "reward": 0.11875,
+      "reward_std": 0.18907372057437896,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.096875,
+      "step": 190
+    },
+    {
+      "completion_length": 89.915625,
+      "epoch": 0.21333333333333335,
+      "grad_norm": 0.19012346863746643,
+      "kl": 0.08895263671875,
+      "learning_rate": 0.0002127659574468085,
+      "loss": 0.1321,
+      "reward": 0.46875,
+      "reward_std": 0.41404569447040557,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.453125,
+      "step": 200
+    },
+    {
+      "completion_length": 47.74375,
+      "epoch": 0.224,
+      "grad_norm": 0.4668453335762024,
+      "kl": 0.26416015625,
+      "learning_rate": 0.0002234042553191489,
+      "loss": 0.0712,
+      "reward": 0.871875,
+      "reward_std": 0.19805223047733306,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.853125,
+      "step": 210
+    },
+    {
+      "completion_length": 45.15625,
+      "epoch": 0.23466666666666666,
+      "grad_norm": 0.21052278578281403,
+      "kl": 0.3112213134765625,
+      "learning_rate": 0.00023404255319148934,
+      "loss": 0.0464,
+      "reward": 0.890625,
+      "reward_std": 0.11346687823534012,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.878125,
+      "step": 220
+    },
+    {
+      "completion_length": 57.2875,
+      "epoch": 0.24533333333333332,
+      "grad_norm": 0.16618619859218597,
+      "kl": 0.254522705078125,
+      "learning_rate": 0.00024468085106382976,
+      "loss": 0.0589,
+      "reward": 0.834375,
+      "reward_std": 0.12261751294136047,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.821875,
+      "step": 230
+    },
+    {
+      "completion_length": 68.153125,
+      "epoch": 0.256,
+      "grad_norm": 0.17739807069301605,
+      "kl": 0.214471435546875,
+      "learning_rate": 0.0002553191489361702,
+      "loss": 0.1375,
+      "reward": 0.659375,
+      "reward_std": 0.28527562469244006,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.65,
+      "step": 240
+    },
+    {
+      "completion_length": 52.709375,
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.09843996912240982,
+      "kl": 0.2847900390625,
+      "learning_rate": 0.0002659574468085106,
+      "loss": 0.1085,
+      "reward": 0.834375,
+      "reward_std": 0.290549997985363,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.8,
+      "step": 250
+    },
+    {
+      "completion_length": 51.55625,
+      "epoch": 0.2773333333333333,
+      "grad_norm": 0.1133696436882019,
+      "kl": 0.276953125,
+      "learning_rate": 0.00027659574468085103,
+      "loss": 0.0437,
+      "reward": 0.903125,
+      "reward_std": 0.1361730858683586,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.878125,
+      "step": 260
+    },
+    {
+      "completion_length": 55.046875,
+      "epoch": 0.288,
+      "grad_norm": 0.14536090195178986,
+      "kl": 0.2501953125,
+      "learning_rate": 0.0002872340425531915,
+      "loss": 0.0588,
+      "reward": 0.878125,
+      "reward_std": 0.13846687823534012,
+      "rewards/accuracy_reward": 0.00625,
+      "rewards/format_reward": 0.871875,
+      "step": 270
+    },
+    {
+      "completion_length": 55.853125,
+      "epoch": 0.2986666666666667,
+      "grad_norm": 0.1799221634864807,
+      "kl": 0.3143310546875,
+      "learning_rate": 0.00029787234042553186,
+      "loss": 0.0609,
+      "reward": 0.90625,
+      "reward_std": 0.18080126941204072,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.884375,
+      "step": 280
+    },
+    {
+      "completion_length": 59.85,
+      "epoch": 0.30933333333333335,
+      "grad_norm": 0.10688479989767075,
+      "kl": 0.20706787109375,
+      "learning_rate": 0.0002999925930442553,
+      "loss": 0.0522,
+      "reward": 0.815625,
+      "reward_std": 0.2959165498614311,
+      "rewards/accuracy_reward": 0.065625,
+      "rewards/format_reward": 0.75,
+      "step": 290
+    },
+    {
+      "completion_length": 64.625,
+      "epoch": 0.32,
+      "grad_norm": 0.03851361572742462,
+      "kl": 0.201220703125,
+      "learning_rate": 0.00029996250354024344,
+      "loss": 0.0815,
+      "reward": 0.8625,
+      "reward_std": 0.21301814764738083,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.85,
+      "step": 300
+    },
+    {
+      "completion_length": 57.95,
+      "epoch": 0.33066666666666666,
+      "grad_norm": 0.23480646312236786,
+      "kl": 0.221240234375,
+      "learning_rate": 0.0002999092731927958,
+      "loss": 0.0292,
+      "reward": 0.921875,
+      "reward_std": 0.15895397514104842,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.884375,
+      "step": 310
+    },
+    {
+      "completion_length": 64.196875,
+      "epoch": 0.3413333333333333,
+      "grad_norm": 0.1151675432920456,
+      "kl": 0.20123291015625,
+      "learning_rate": 0.0002998329102159332,
+      "loss": 0.0491,
+      "reward": 0.83125,
+      "reward_std": 0.19258119761943818,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.8125,
+      "step": 320
+    },
+    {
+      "completion_length": 70.628125,
+      "epoch": 0.352,
+      "grad_norm": 0.1377689391374588,
+      "kl": 0.1906005859375,
+      "learning_rate": 0.0002997334263932927,
+      "loss": 0.0841,
+      "reward": 0.846875,
+      "reward_std": 0.21890811175107955,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.83125,
+      "step": 330
+    },
+    {
+      "completion_length": 61.54375,
+      "epoch": 0.3626666666666667,
+      "grad_norm": 0.0947548896074295,
+      "kl": 0.21240234375,
+      "learning_rate": 0.0002996108370763087,
+      "loss": 0.062,
+      "reward": 0.88125,
+      "reward_std": 0.13713996410369872,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.865625,
+      "step": 340
+    },
+    {
+      "completion_length": 60.109375,
+      "epoch": 0.37333333333333335,
+      "grad_norm": 0.14599719643592834,
+      "kl": 0.2236083984375,
+      "learning_rate": 0.0002994651611818448,
+      "loss": 0.0408,
+      "reward": 0.928125,
+      "reward_std": 0.18282372057437896,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.9,
+      "step": 350
+    },
+    {
+      "completion_length": 62.90625,
+      "epoch": 0.384,
+      "grad_norm": 0.3738599121570587,
+      "kl": 0.2464111328125,
+      "learning_rate": 0.00029929642118927394,
+      "loss": 0.0753,
+      "reward": 0.834375,
+      "reward_std": 0.20676814764738083,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.809375,
+      "step": 360
+    },
+    {
+      "completion_length": 70.1625,
+      "epoch": 0.39466666666666667,
+      "grad_norm": 2.8762810230255127,
+      "kl": 0.88681640625,
+      "learning_rate": 0.00029910464313701013,
+      "loss": 0.2053,
+      "reward": 0.640625,
+      "reward_std": 0.38192625939846037,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.63125,
+      "step": 370
+    },
+    {
+      "completion_length": 43.578125,
+      "epoch": 0.4053333333333333,
+      "grad_norm": 0.9355350136756897,
+      "kl": 2.06865234375,
+      "learning_rate": 0.0002988898566184902,
+      "loss": 0.2631,
+      "reward": 0.725,
+      "reward_std": 0.32462068647146225,
+      "rewards/accuracy_reward": 0.0,
+      "rewards/format_reward": 0.725,
+      "step": 380
+    },
+    {
+      "completion_length": 44.19375,
+      "epoch": 0.416,
+      "grad_norm": 0.709173858165741,
+      "kl": 3.47939453125,
+      "learning_rate": 0.0002986520947776074,
+      "loss": 0.3225,
+      "reward": 0.6125,
+      "reward_std": 0.3950331017374992,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.603125,
+      "step": 390
+    },
+    {
+      "completion_length": 54.775,
+      "epoch": 0.4266666666666667,
+      "grad_norm": 0.6549698114395142,
+      "kl": 4.3202392578125,
+      "learning_rate": 0.0002983913943035968,
+      "loss": 0.3808,
+      "reward": 0.66875,
+      "reward_std": 0.3901100158691406,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.65,
+      "step": 400
+    },
+    {
+      "completion_length": 53.871875,
+      "epoch": 0.43733333333333335,
+      "grad_norm": 0.01826515607535839,
+      "kl": 2.477734375,
+      "learning_rate": 0.00029810779542537355,
+      "loss": 0.2661,
+      "reward": 0.79375,
+      "reward_std": 0.22999776750802994,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.76875,
+      "step": 410
+    },
+    {
+      "completion_length": 49.434375,
+      "epoch": 0.448,
+      "grad_norm": 0.5134692192077637,
+      "kl": 2.07587890625,
+      "learning_rate": 0.0002978013419053255,
+      "loss": 0.2091,
+      "reward": 0.771875,
+      "reward_std": 0.26785253882408144,
+      "rewards/accuracy_reward": 0.0,
+      "rewards/format_reward": 0.771875,
+      "step": 420
+    },
+    {
+      "completion_length": 59.475,
+      "epoch": 0.45866666666666667,
+      "grad_norm": 0.7835673689842224,
+      "kl": 2.516943359375,
+      "learning_rate": 0.00029747208103256,
+      "loss": 0.2312,
+      "reward": 0.740625,
+      "reward_std": 0.31220938116312025,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.728125,
+      "step": 430
+    },
+    {
+      "completion_length": 58.815625,
+      "epoch": 0.4693333333333333,
+      "grad_norm": 0.021143430843949318,
+      "kl": 2.1959716796875,
+      "learning_rate": 0.0002971200636156068,
+      "loss": 0.2386,
+      "reward": 0.796875,
+      "reward_std": 0.2231356605887413,
+      "rewards/accuracy_reward": 0.0,
+      "rewards/format_reward": 0.796875,
+      "step": 440
+    },
+    {
+      "completion_length": 55.696875,
+      "epoch": 0.48,
+      "grad_norm": 1.8231980800628662,
+      "kl": 2.9314697265625,
+      "learning_rate": 0.00029674534397457745,
+      "loss": 0.3506,
+      "reward": 0.796875,
+      "reward_std": 0.2616912335157394,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.775,
+      "step": 450
+    },
+    {
+      "completion_length": 49.778125,
+      "epoch": 0.49066666666666664,
+      "grad_norm": 0.5252532362937927,
+      "kl": 1.47607421875,
+      "learning_rate": 0.00029634797993278333,
+      "loss": 0.2026,
+      "reward": 0.89375,
+      "reward_std": 0.11293471753597259,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.88125,
+      "step": 460
+    },
+    {
+      "completion_length": 57.903125,
+      "epoch": 0.5013333333333333,
+      "grad_norm": 0.10718824714422226,
+      "kl": 1.683837890625,
+      "learning_rate": 0.000295928032807813,
+      "loss": 0.1887,
+      "reward": 0.859375,
+      "reward_std": 0.1423343911767006,
+      "rewards/accuracy_reward": 0.00625,
+      "rewards/format_reward": 0.853125,
+      "step": 470
+    },
+    {
+      "completion_length": 63.734375,
+      "epoch": 0.512,
+      "grad_norm": 0.32101932168006897,
+      "kl": 2.9671875,
+      "learning_rate": 0.00029548556740206994,
+      "loss": 0.3254,
+      "reward": 0.79375,
+      "reward_std": 0.29874250292778015,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.784375,
+      "step": 480
+    },
+    {
+      "completion_length": 66.253125,
+      "epoch": 0.5226666666666666,
+      "grad_norm": 0.7132259011268616,
+      "kl": 2.6101806640625,
+      "learning_rate": 0.0002950206519927731,
+      "loss": 0.2574,
+      "reward": 0.728125,
+      "reward_std": 0.3086773693561554,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.709375,
+      "step": 490
+    },
+    {
+      "completion_length": 64.765625,
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.7293491959571838,
+      "kl": 3.3251220703125,
+      "learning_rate": 0.00029453335832142075,
+      "loss": 0.3315,
+      "reward": 0.75625,
+      "reward_std": 0.27030970752239225,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.746875,
+      "step": 500
+    },
+    {
+      "completion_length": 61.140625,
+      "epoch": 0.544,
+      "grad_norm": 0.4900813400745392,
+      "kl": 1.6069091796875,
+      "learning_rate": 0.0002940237615827202,
+      "loss": 0.162,
+      "reward": 0.86875,
+      "reward_std": 0.21899680644273758,
+      "rewards/accuracy_reward": 0.053125,
+      "rewards/format_reward": 0.815625,
+      "step": 510
+    },
+    {
+      "completion_length": 59.621875,
+      "epoch": 0.5546666666666666,
+      "grad_norm": 0.4984245002269745,
+      "kl": 1.695751953125,
+      "learning_rate": 0.00029349194041298435,
+      "loss": 0.2075,
+      "reward": 0.903125,
+      "reward_std": 0.16081304997205734,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.86875,
+      "step": 520
+    },
+    {
+      "completion_length": 62.209375,
+      "epoch": 0.5653333333333334,
+      "grad_norm": 0.25215986371040344,
+      "kl": 1.5575439453125,
+      "learning_rate": 0.0002929379768779971,
+      "loss": 0.1648,
+      "reward": 0.890625,
+      "reward_std": 0.17983439117670058,
+      "rewards/accuracy_reward": 0.040625,
+      "rewards/format_reward": 0.85,
+      "step": 530
+    },
+    {
+      "completion_length": 65.865625,
+      "epoch": 0.576,
+      "grad_norm": 0.1489488184452057,
+      "kl": 2.0063720703125,
+      "learning_rate": 0.0002923619564603501,
+      "loss": 0.187,
+      "reward": 0.78125,
+      "reward_std": 0.2043856605887413,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.75,
+      "step": 540
+    },
+    {
+      "completion_length": 68.68125,
+      "epoch": 0.5866666666666667,
+      "grad_norm": 0.24991311132907867,
+      "kl": 1.0421142578125,
+      "learning_rate": 0.00029176396804625135,
+      "loss": 0.0977,
+      "reward": 0.909375,
+      "reward_std": 0.1441847175359726,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.8875,
+      "step": 550
+    },
+    {
+      "completion_length": 64.2,
+      "epoch": 0.5973333333333334,
+      "grad_norm": 0.7193971872329712,
+      "kl": 2.2302490234375,
+      "learning_rate": 0.00029114410391180946,
+      "loss": 0.2166,
+      "reward": 0.834375,
+      "reward_std": 0.2048343911767006,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.81875,
+      "step": 560
+    },
+    {
+      "completion_length": 66.01875,
+      "epoch": 0.608,
+      "grad_norm": 0.59996098279953,
+      "kl": 2.652294921875,
+      "learning_rate": 0.0002905024597087945,
+      "loss": 0.2907,
+      "reward": 0.815625,
+      "reward_std": 0.20596464574337006,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.7875,
+      "step": 570
+    },
+    {
+      "completion_length": 69.009375,
+      "epoch": 0.6186666666666667,
+      "grad_norm": 0.32363754510879517,
+      "kl": 0.75888671875,
+      "learning_rate": 0.0002898391344498775,
+      "loss": 0.112,
+      "reward": 0.896875,
+      "reward_std": 0.14761751294136047,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.8625,
+      "step": 580
+    },
+    {
+      "completion_length": 58.490625,
+      "epoch": 0.6293333333333333,
+      "grad_norm": 0.5817243456840515,
+      "kl": 3.2273193359375,
+      "learning_rate": 0.0002891542304933521,
+      "loss": 0.3775,
+      "reward": 0.796875,
+      "reward_std": 0.24620190411806106,
+      "rewards/accuracy_reward": 0.00625,
+      "rewards/format_reward": 0.790625,
+      "step": 590
+    },
+    {
+      "completion_length": 56.890625,
+      "epoch": 0.64,
+      "grad_norm": 0.561817467212677,
+      "kl": 1.345703125,
+      "learning_rate": 0.00028844785352733924,
+      "loss": 0.1409,
+      "reward": 0.884375,
+      "reward_std": 0.1315855011343956,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.8625,
+      "step": 600
+    },
+    {
+      "completion_length": 54.61875,
+      "epoch": 0.6506666666666666,
+      "grad_norm": 0.41451311111450195,
+      "kl": 1.3966552734375,
+      "learning_rate": 0.00028772011255347873,
+      "loss": 0.1476,
+      "reward": 0.890625,
+      "reward_std": 0.16838996410369872,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.871875,
+      "step": 610
+    },
+    {
+      "completion_length": 58.15,
+      "epoch": 0.6613333333333333,
+      "grad_norm": 0.38927924633026123,
+      "kl": 2.2388427734375,
+      "learning_rate": 0.00028697111987010865,
+      "loss": 0.2576,
+      "reward": 0.871875,
+      "reward_std": 0.1995512694120407,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.84375,
+      "step": 620
+    },
+    {
+      "completion_length": 57.20625,
+      "epoch": 0.672,
+      "grad_norm": 0.09751415997743607,
+      "kl": 0.793994140625,
+      "learning_rate": 0.0002862009910549369,
+      "loss": 0.0629,
+      "reward": 0.9375,
+      "reward_std": 0.10386751294136047,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.91875,
+      "step": 630
+    },
+    {
+      "completion_length": 65.846875,
+      "epoch": 0.6826666666666666,
+      "grad_norm": 0.2675510048866272,
+      "kl": 2.466015625,
+      "learning_rate": 0.0002854098449472061,
+      "loss": 0.2627,
+      "reward": 0.79375,
+      "reward_std": 0.2520918682217598,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.778125,
+      "step": 640
+    },
+    {
+      "completion_length": 62.98125,
+      "epoch": 0.6933333333333334,
+      "grad_norm": 0.15855202078819275,
+      "kl": 1.8398193359375,
+      "learning_rate": 0.00028459780362935527,
+      "loss": 0.177,
+      "reward": 0.91875,
+      "reward_std": 0.15879059880971907,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.90625,
+      "step": 650
+    },
+    {
+      "completion_length": 62.6,
+      "epoch": 0.704,
+      "grad_norm": 0.12087615579366684,
+      "kl": 2.0813720703125,
+      "learning_rate": 0.0002837649924081816,
+      "loss": 0.1866,
+      "reward": 0.90625,
+      "reward_std": 0.19479155987501146,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.875,
+      "step": 660
+    },
+    {
+      "completion_length": 65.925,
+      "epoch": 0.7146666666666667,
+      "grad_norm": 0.39411771297454834,
+      "kl": 1.392919921875,
+      "learning_rate": 0.00028291153979550387,
+      "loss": 0.2015,
+      "reward": 0.915625,
+      "reward_std": 0.1775405988097191,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.8875,
+      "step": 670
+    },
+    {
+      "completion_length": 64.378125,
+      "epoch": 0.7253333333333334,
+      "grad_norm": 1.1659783124923706,
+      "kl": 2.8165283203125,
+      "learning_rate": 0.00028203757748833174,
+      "loss": 0.3109,
+      "reward": 0.778125,
+      "reward_std": 0.21169123351573943,
+      "rewards/accuracy_reward": 0.003125,
+      "rewards/format_reward": 0.775,
+      "step": 680
+    },
+    {
+      "completion_length": 57.334375,
+      "epoch": 0.736,
+      "grad_norm": 0.27627384662628174,
+      "kl": 1.0085693359375,
+      "learning_rate": 0.0002811432403485437,
+      "loss": 0.1226,
+      "reward": 0.859375,
+      "reward_std": 0.11540063470602036,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.85,
+      "step": 690
+    },
+    {
+      "completion_length": 54.6,
+      "epoch": 0.7466666666666667,
+      "grad_norm": 0.4506663382053375,
+      "kl": 2.3274169921875,
+      "learning_rate": 0.00028022866638207624,
+      "loss": 0.2726,
+      "reward": 0.853125,
+      "reward_std": 0.2143363133072853,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.828125,
+      "step": 700
+    },
+    {
+      "completion_length": 64.646875,
+      "epoch": 0.7573333333333333,
+      "grad_norm": 0.24161870777606964,
+      "kl": 1.10751953125,
+      "learning_rate": 0.00027929399671762793,
+      "loss": 0.1497,
+      "reward": 0.878125,
+      "reward_std": 0.18096464574337007,
+      "rewards/accuracy_reward": 0.04375,
+      "rewards/format_reward": 0.834375,
+      "step": 710
+    },
+    {
+      "completion_length": 65.128125,
+      "epoch": 0.768,
+      "grad_norm": 0.22652657330036163,
+      "kl": 1.7567138671875,
+      "learning_rate": 0.00027833937558488183,
+      "loss": 0.1692,
+      "reward": 0.865625,
+      "reward_std": 0.19575843811035157,
+      "rewards/accuracy_reward": 0.059375,
+      "rewards/format_reward": 0.80625,
+      "step": 720
+    },
+    {
+      "completion_length": 76.259375,
+      "epoch": 0.7786666666666666,
+      "grad_norm": 0.46417316794395447,
+      "kl": 3.4798828125,
+      "learning_rate": 0.0002773649502922495,
+      "loss": 0.3618,
+      "reward": 0.7125,
+      "reward_std": 0.31879488229751585,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.7,
+      "step": 730
+    },
+    {
+      "completion_length": 64.928125,
+      "epoch": 0.7893333333333333,
+      "grad_norm": 0.8743041753768921,
+      "kl": 2.166650390625,
+      "learning_rate": 0.00027637087120413933,
+      "loss": 0.2562,
+      "reward": 0.840625,
+      "reward_std": 0.2851921945810318,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.803125,
+      "step": 740
+    },
+    {
+      "completion_length": 58.028125,
+      "epoch": 0.8,
+      "grad_norm": 0.18655003607273102,
+      "kl": 1.73994140625,
+      "learning_rate": 0.000275357291717754,
+      "loss": 0.191,
+      "reward": 0.909375,
+      "reward_std": 0.19460364878177644,
+      "rewards/accuracy_reward": 0.053125,
+      "rewards/format_reward": 0.85625,
+      "step": 750
+    },
+    {
+      "completion_length": 60.803125,
+      "epoch": 0.8106666666666666,
+      "grad_norm": 0.04459076747298241,
+      "kl": 1.7782470703125,
+      "learning_rate": 0.0002743243682394195,
+      "loss": 0.2117,
+      "reward": 0.83125,
+      "reward_std": 0.16213996410369874,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.80625,
+      "step": 760
+    },
+    {
+      "completion_length": 56.203125,
+      "epoch": 0.8213333333333334,
+      "grad_norm": 0.04220689460635185,
+      "kl": 1.7406494140625,
+      "learning_rate": 0.00027327226016044963,
+      "loss": 0.1999,
+      "reward": 0.878125,
+      "reward_std": 0.1423343911767006,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.84375,
+      "step": 770
+    },
+    {
+      "completion_length": 58.375,
+      "epoch": 0.832,
+      "grad_norm": 0.3807085156440735,
+      "kl": 1.8222412109375,
+      "learning_rate": 0.00027220112983255087,
+      "loss": 0.2296,
+      "reward": 0.903125,
+      "reward_std": 0.20482564270496367,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.86875,
+      "step": 780
+    },
+    {
+      "completion_length": 63.378125,
+      "epoch": 0.8426666666666667,
+      "grad_norm": 0.01206011138856411,
+      "kl": 2.458740234375,
+      "learning_rate": 0.00027111114254276913,
+      "loss": 0.3096,
+      "reward": 0.84375,
+      "reward_std": 0.2114198923110962,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.821875,
+      "step": 790
+    },
+    {
+      "completion_length": 59.7625,
+      "epoch": 0.8533333333333334,
+      "grad_norm": 0.40591439604759216,
+      "kl": 1.378076171875,
+      "learning_rate": 0.00027000246648798456,
+      "loss": 0.1403,
+      "reward": 0.934375,
+      "reward_std": 0.14083535224199295,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.903125,
+      "step": 800
+    },
+    {
+      "completion_length": 62.284375,
+      "epoch": 0.864,
+      "grad_norm": 0.27511999011039734,
+      "kl": 2.2107177734375,
+      "learning_rate": 0.0002688752727489565,
+      "loss": 0.2636,
+      "reward": 0.8875,
+      "reward_std": 0.21739855110645295,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.859375,
+      "step": 810
+    },
+    {
+      "completion_length": 65.265625,
+      "epoch": 0.8746666666666667,
+      "grad_norm": 0.2582601010799408,
+      "kl": 2.3897705078125,
+      "learning_rate": 0.00026772973526392453,
+      "loss": 0.2965,
+      "reward": 0.83125,
+      "reward_std": 0.2494538262486458,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.803125,
+      "step": 820
+    },
+    {
+      "completion_length": 54.865625,
+      "epoch": 0.8853333333333333,
+      "grad_norm": 0.23494267463684082,
+      "kl": 2.6015625,
+      "learning_rate": 0.0002665660308017671,
+      "loss": 0.252,
+      "reward": 0.9,
+      "reward_std": 0.23950843811035155,
+      "rewards/accuracy_reward": 0.04375,
+      "rewards/format_reward": 0.85625,
+      "step": 830
+    },
+    {
+      "completion_length": 55.503125,
+      "epoch": 0.896,
+      "grad_norm": 0.20798054337501526,
+      "kl": 1.5889892578125,
+      "learning_rate": 0.000265384338934725,
+      "loss": 0.1996,
+      "reward": 0.9375,
+      "reward_std": 0.20120493620634078,
+      "rewards/accuracy_reward": 0.0625,
+      "rewards/format_reward": 0.875,
+      "step": 840
+    },
+    {
+      "completion_length": 59.909375,
+      "epoch": 0.9066666666666666,
+      "grad_norm": 0.23807695508003235,
+      "kl": 1.656982421875,
+      "learning_rate": 0.00026418484201069055,
+      "loss": 0.194,
+      "reward": 0.840625,
+      "reward_std": 0.17524680644273757,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.80625,
+      "step": 850
+    },
+    {
+      "completion_length": 56.74375,
+      "epoch": 0.9173333333333333,
+      "grad_norm": 0.21559438109397888,
+      "kl": 0.813427734375,
+      "learning_rate": 0.00026296772512507025,
+      "loss": 0.1054,
+      "reward": 0.884375,
+      "reward_std": 0.13916241526603698,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.8625,
+      "step": 860
+    },
+    {
+      "completion_length": 62.390625,
+      "epoch": 0.928,
+      "grad_norm": 0.1291944831609726,
+      "kl": 1.9663330078125,
+      "learning_rate": 0.0002617331760922218,
+      "loss": 0.2316,
+      "reward": 0.85625,
+      "reward_std": 0.15685684233903885,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.84375,
+      "step": 870
+    },
+    {
+      "completion_length": 56.009375,
+      "epoch": 0.9386666666666666,
+      "grad_norm": 1.045857548713684,
+      "kl": 1.652001953125,
+      "learning_rate": 0.0002604813854164726,
+      "loss": 0.1616,
+      "reward": 0.9375,
+      "reward_std": 0.16336943507194518,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.909375,
+      "step": 880
+    },
+    {
+      "completion_length": 64.446875,
+      "epoch": 0.9493333333333334,
+      "grad_norm": 0.33091413974761963,
+      "kl": 3.235400390625,
+      "learning_rate": 0.0002592125462627231,
+      "loss": 0.3973,
+      "reward": 0.796875,
+      "reward_std": 0.2716366216540337,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.78125,
+      "step": 890
+    },
+    {
+      "completion_length": 59.0,
+      "epoch": 0.96,
+      "grad_norm": 0.25974419713020325,
+      "kl": 1.752197265625,
+      "learning_rate": 0.00025792685442663877,
+      "loss": 0.1938,
+      "reward": 0.89375,
+      "reward_std": 0.1826515957713127,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.865625,
+      "step": 900
+    },
+    {
+      "completion_length": 57.4125,
+      "epoch": 0.9706666666666667,
+      "grad_norm": 0.2569887936115265,
+      "kl": 2.5720703125,
+      "learning_rate": 0.00025662450830443733,
+      "loss": 0.3213,
+      "reward": 0.846875,
+      "reward_std": 0.22065922170877456,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.815625,
+      "step": 910
+    },
+    {
+      "completion_length": 51.140625,
+      "epoch": 0.9813333333333333,
+      "grad_norm": 0.18798935413360596,
+      "kl": 1.6124267578125,
+      "learning_rate": 0.0002553057088622736,
+      "loss": 0.2214,
+      "reward": 0.925,
+      "reward_std": 0.1477062076330185,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.909375,
+      "step": 920
+    },
+    {
+      "completion_length": 53.921875,
+      "epoch": 0.992,
+      "grad_norm": 0.8309330940246582,
+      "kl": 1.651806640625,
+      "learning_rate": 0.0002539706596052286,
+      "loss": 0.1893,
+      "reward": 0.909375,
+      "reward_std": 0.16504059880971908,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.878125,
+      "step": 930
+    },
+    {
+      "completion_length": 59.96052631578947,
+      "epoch": 1.0021333333333333,
+      "grad_norm": 0.3670661151409149,
+      "kl": 3.8713250411184212,
+      "learning_rate": 0.000252619566545906,
+      "loss": 0.4192,
+      "reward": 0.7796052631578947,
+      "reward_std": 0.2917690135930714,
+      "rewards/accuracy_reward": 0.01644736842105263,
+      "rewards/format_reward": 0.7631578947368421,
+      "step": 940
+    },
+    {
+      "completion_length": 59.1125,
+      "epoch": 1.0128,
+      "grad_norm": 0.2787770926952362,
+      "kl": 2.920068359375,
+      "learning_rate": 0.0002512526381726427,
+      "loss": 0.4194,
+      "reward": 0.734375,
+      "reward_std": 0.3439827933907509,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.715625,
+      "step": 950
+    },
+    {
+      "completion_length": 57.63125,
+      "epoch": 1.0234666666666667,
+      "grad_norm": 0.15397749841213226,
+      "kl": 2.7442626953125,
+      "learning_rate": 0.00024987008541733663,
+      "loss": 0.3308,
+      "reward": 0.81875,
+      "reward_std": 0.24064744114875794,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.80625,
+      "step": 960
+    },
+    {
+      "completion_length": 51.134375,
+      "epoch": 1.0341333333333333,
+      "grad_norm": 0.41957736015319824,
+      "kl": 1.035986328125,
+      "learning_rate": 0.0002484721216228974,
+      "loss": 0.1489,
+      "reward": 0.9625,
+      "reward_std": 0.10561862289905548,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.940625,
+      "step": 970
+    },
+    {
+      "completion_length": 56.68125,
+      "epoch": 1.0448,
+      "grad_norm": 0.11578945815563202,
+      "kl": 1.0271240234375,
+      "learning_rate": 0.0002470589625103255,
+      "loss": 0.1162,
+      "reward": 0.9,
+      "reward_std": 0.13415063470602034,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.865625,
+      "step": 980
+    },
+    {
+      "completion_length": 55.071875,
+      "epoch": 1.0554666666666668,
+      "grad_norm": 0.006299301981925964,
+      "kl": 1.3626953125,
+      "learning_rate": 0.0002456308261454241,
+      "loss": 0.1452,
+      "reward": 0.909375,
+      "reward_std": 0.1264015957713127,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.8875,
+      "step": 990
+    },
+    {
+      "completion_length": 58.0875,
+      "epoch": 1.0661333333333334,
+      "grad_norm": 0.007178621832281351,
+      "kl": 1.4033203125,
+      "learning_rate": 0.00024418793290514906,
+      "loss": 0.1534,
+      "reward": 0.86875,
+      "reward_std": 0.13291241526603698,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.85,
+      "step": 1000
+    },
+    {
+      "completion_length": 58.828125,
+      "epoch": 1.0768,
+      "grad_norm": 0.1767469048500061,
+      "kl": 1.26591796875,
+      "learning_rate": 0.0002427305054436024,
+      "loss": 0.1309,
+      "reward": 0.915625,
+      "reward_std": 0.15447435528039932,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.88125,
+      "step": 1010
+    },
+    {
+      "completion_length": 60.309375,
+      "epoch": 1.0874666666666666,
+      "grad_norm": 0.20854564011096954,
+      "kl": 2.313330078125,
+      "learning_rate": 0.00024125876865767438,
+      "loss": 0.2191,
+      "reward": 0.90625,
+      "reward_std": 0.17595286518335343,
+      "rewards/accuracy_reward": 0.053125,
+      "rewards/format_reward": 0.853125,
+      "step": 1020
+    },
+    {
+      "completion_length": 60.540625,
+      "epoch": 1.0981333333333334,
+      "grad_norm": 0.13962095975875854,
+      "kl": 2.0788818359375,
+      "learning_rate": 0.0002397729496523396,
+      "loss": 0.226,
+      "reward": 0.890625,
+      "reward_std": 0.18282372057437896,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.865625,
+      "step": 1030
+    },
+    {
+      "completion_length": 58.89375,
+      "epoch": 1.1088,
+      "grad_norm": 0.25990164279937744,
+      "kl": 1.064794921875,
+      "learning_rate": 0.0002382732777056119,
+      "loss": 0.1602,
+      "reward": 0.915625,
+      "reward_std": 0.16433631330728532,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.890625,
+      "step": 1040
+    },
+    {
+      "completion_length": 58.478125,
+      "epoch": 1.1194666666666666,
+      "grad_norm": 0.21004174649715424,
+      "kl": 1.47158203125,
+      "learning_rate": 0.00023675998423316457,
+      "loss": 0.1682,
+      "reward": 0.953125,
+      "reward_std": 0.18810684233903885,
+      "rewards/accuracy_reward": 0.05,
+      "rewards/format_reward": 0.903125,
+      "step": 1050
+    },
+    {
+      "completion_length": 59.39375,
+      "epoch": 1.1301333333333332,
+      "grad_norm": 0.08877279609441757,
+      "kl": 1.388037109375,
+      "learning_rate": 0.00023523330275262037,
+      "loss": 0.1636,
+      "reward": 0.9125,
+      "reward_std": 0.1637136846780777,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.875,
+      "step": 1060
+    },
+    {
+      "completion_length": 63.709375,
+      "epoch": 1.1408,
+      "grad_norm": 0.218344584107399,
+      "kl": 1.704052734375,
+      "learning_rate": 0.00023369346884751706,
+      "loss": 0.2163,
+      "reward": 0.884375,
+      "reward_std": 0.23439744114875793,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.8375,
+      "step": 1070
+    },
+    {
+      "completion_length": 60.93125,
+      "epoch": 1.1514666666666666,
+      "grad_norm": 0.24491117894649506,
+      "kl": 1.15849609375,
+      "learning_rate": 0.00023214072013095434,
+      "loss": 0.1445,
+      "reward": 0.925,
+      "reward_std": 0.14206304997205735,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.890625,
+      "step": 1080
+    },
+    {
+      "completion_length": 55.40625,
+      "epoch": 1.1621333333333332,
+      "grad_norm": 1.4203561544418335,
+      "kl": 1.4958740234375,
+      "learning_rate": 0.00023057529620892773,
+      "loss": 0.2111,
+      "reward": 0.946875,
+      "reward_std": 0.18555223047733307,
+      "rewards/accuracy_reward": 0.040625,
+      "rewards/format_reward": 0.90625,
+      "step": 1090
+    },
+    {
+      "completion_length": 61.6375,
+      "epoch": 1.1728,
+      "grad_norm": 0.07859649509191513,
+      "kl": 2.87578125,
+      "learning_rate": 0.00022899743864335462,
+      "loss": 0.3232,
+      "reward": 0.825,
+      "reward_std": 0.22410253882408143,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.8125,
+      "step": 1100
+    },
+    {
+      "completion_length": 56.74375,
+      "epoch": 1.1834666666666667,
+      "grad_norm": 0.6798639893531799,
+      "kl": 2.165380859375,
+      "learning_rate": 0.0002274073909147986,
+      "loss": 0.29,
+      "reward": 0.86875,
+      "reward_std": 0.17693375647068024,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.85625,
+      "step": 1110
+    },
+    {
+      "completion_length": 56.08125,
+      "epoch": 1.1941333333333333,
+      "grad_norm": 0.46122825145721436,
+      "kl": 2.6029052734375,
+      "learning_rate": 0.000225805398384898,
+      "loss": 0.2877,
+      "reward": 0.85625,
+      "reward_std": 0.15879059880971907,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.8375,
+      "step": 1120
+    },
+    {
+      "completion_length": 53.4875,
+      "epoch": 1.2048,
+      "grad_norm": 0.0872046947479248,
+      "kl": 1.120751953125,
+      "learning_rate": 0.0002241917082585036,
+      "loss": 0.1583,
+      "reward": 0.959375,
+      "reward_std": 0.16838996410369872,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.9125,
+      "step": 1130
+    },
+    {
+      "completion_length": 52.09375,
+      "epoch": 1.2154666666666667,
+      "grad_norm": 0.2135591208934784,
+      "kl": 1.0230712890625,
+      "learning_rate": 0.00022256656954553245,
+      "loss": 0.1191,
+      "reward": 0.9625,
+      "reward_std": 0.14858439117670058,
+      "rewards/accuracy_reward": 0.05625,
+      "rewards/format_reward": 0.90625,
+      "step": 1140
+    },
+    {
+      "completion_length": 50.771875,
+      "epoch": 1.2261333333333333,
+      "grad_norm": 0.2628862261772156,
+      "kl": 2.2327880859375,
+      "learning_rate": 0.00022093023302254295,
+      "loss": 0.2802,
+      "reward": 0.953125,
+      "reward_std": 0.19831304997205734,
+      "rewards/accuracy_reward": 0.053125,
+      "rewards/format_reward": 0.9,
+      "step": 1150
+    },
+    {
+      "completion_length": 55.653125,
+      "epoch": 1.2368000000000001,
+      "grad_norm": 0.01043323241174221,
+      "kl": 1.43515625,
+      "learning_rate": 0.0002192829511940371,
+      "loss": 0.216,
+      "reward": 0.871875,
+      "reward_std": 0.17604155987501144,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.840625,
+      "step": 1160
+    },
+    {
+      "completion_length": 56.01875,
+      "epoch": 1.2474666666666667,
+      "grad_norm": 0.30781543254852295,
+      "kl": 1.352490234375,
+      "learning_rate": 0.00021762497825349663,
+      "loss": 0.1604,
+      "reward": 0.875,
+      "reward_std": 0.18801814764738084,
+      "rewards/accuracy_reward": 0.040625,
+      "rewards/format_reward": 0.834375,
+      "step": 1170
+    },
+    {
+      "completion_length": 57.009375,
+      "epoch": 1.2581333333333333,
+      "grad_norm": 0.268877774477005,
+      "kl": 1.6961669921875,
+      "learning_rate": 0.00021595657004415777,
+      "loss": 0.2207,
+      "reward": 0.896875,
+      "reward_std": 0.18351925760507584,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.865625,
+      "step": 1180
+    },
+    {
+      "completion_length": 53.909375,
+      "epoch": 1.2688,
+      "grad_norm": 0.10004394501447678,
+      "kl": 1.055419921875,
+      "learning_rate": 0.00021427798401953233,
+      "loss": 0.1206,
+      "reward": 0.915625,
+      "reward_std": 0.10359617173671723,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.878125,
+      "step": 1190
+    },
+    {
+      "completion_length": 55.93125,
+      "epoch": 1.2794666666666665,
+      "grad_norm": 0.16535454988479614,
+      "kl": 2.3431884765625,
+      "learning_rate": 0.0002125894792036794,
+      "loss": 0.3288,
+      "reward": 0.903125,
+      "reward_std": 0.22568152397871016,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.865625,
+      "step": 1200
+    },
+    {
+      "completion_length": 55.09375,
+      "epoch": 1.2901333333333334,
+      "grad_norm": 0.19274021685123444,
+      "kl": 1.0988037109375,
+      "learning_rate": 0.0002108913161512354,
+      "loss": 0.1432,
+      "reward": 0.9375,
+      "reward_std": 0.12358439117670059,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.903125,
+      "step": 1210
+    },
+    {
+      "completion_length": 55.675,
+      "epoch": 1.3008,
+      "grad_norm": 0.15594810247421265,
+      "kl": 1.42451171875,
+      "learning_rate": 0.0002091837569072076,
+      "loss": 0.1693,
+      "reward": 0.94375,
+      "reward_std": 0.16636751294136048,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.896875,
+      "step": 1220
+    },
+    {
+      "completion_length": 56.88125,
+      "epoch": 1.3114666666666666,
+      "grad_norm": 0.3196319341659546,
+      "kl": 1.6291015625,
+      "learning_rate": 0.00020746706496653765,
+      "loss": 0.2144,
+      "reward": 0.915625,
+      "reward_std": 0.1927691087126732,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.86875,
+      "step": 1230
+    },
+    {
+      "completion_length": 58.7875,
+      "epoch": 1.3221333333333334,
+      "grad_norm": 0.13602705299854279,
+      "kl": 1.5197509765625,
+      "learning_rate": 0.00020574150523344152,
+      "loss": 0.1651,
+      "reward": 0.94375,
+      "reward_std": 0.16670301407575608,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.90625,
+      "step": 1240
+    },
+    {
+      "completion_length": 62.996875,
+      "epoch": 1.3328,
+      "grad_norm": 0.05853046849370003,
+      "kl": 1.815576171875,
+      "learning_rate": 0.00020400734398053186,
+      "loss": 0.1795,
+      "reward": 0.86875,
+      "reward_std": 0.19752006977796555,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.83125,
+      "step": 1250
+    },
+    {
+      "completion_length": 62.54375,
+      "epoch": 1.3434666666666666,
+      "grad_norm": 0.007495929021388292,
+      "kl": 1.3890869140625,
+      "learning_rate": 0.0002022648488077294,
+      "loss": 0.1695,
+      "reward": 0.884375,
+      "reward_std": 0.1775405988097191,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.85625,
+      "step": 1260
+    },
+    {
+      "completion_length": 61.278125,
+      "epoch": 1.3541333333333334,
+      "grad_norm": 0.1842016726732254,
+      "kl": 1.7712646484375,
+      "learning_rate": 0.0002005142886009691,
+      "loss": 0.2379,
+      "reward": 0.875,
+      "reward_std": 0.16706304997205734,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.853125,
+      "step": 1270
+    },
+    {
+      "completion_length": 62.025,
+      "epoch": 1.3648,
+      "grad_norm": 0.16039888560771942,
+      "kl": 1.9799560546875,
+      "learning_rate": 0.00019875593349070832,
+      "loss": 0.2323,
+      "reward": 0.91875,
+      "reward_std": 0.20685684233903884,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.871875,
+      "step": 1280
+    },
+    {
+      "completion_length": 61.0375,
+      "epoch": 1.3754666666666666,
+      "grad_norm": 0.15333615243434906,
+      "kl": 2.3615478515625,
+      "learning_rate": 0.0001969900548102427,
+      "loss": 0.2778,
+      "reward": 0.84375,
+      "reward_std": 0.20719234347343446,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.825,
+      "step": 1290
+    },
+    {
+      "completion_length": 58.328125,
+      "epoch": 1.3861333333333334,
+      "grad_norm": 0.07369455695152283,
+      "kl": 2.516748046875,
+      "learning_rate": 0.00019521692505383657,
+      "loss": 0.3136,
+      "reward": 0.85,
+      "reward_std": 0.19249776750802994,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.825,
+      "step": 1300
+    },
+    {
+      "completion_length": 55.35625,
+      "epoch": 1.3968,
+      "grad_norm": 0.2832612693309784,
+      "kl": 1.73642578125,
+      "learning_rate": 0.000193436817834674,
+      "loss": 0.2319,
+      "reward": 0.925,
+      "reward_std": 0.21794123351573944,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.890625,
+      "step": 1310
+    },
+    {
+      "completion_length": 59.0875,
+      "epoch": 1.4074666666666666,
+      "grad_norm": 0.24120619893074036,
+      "kl": 2.8723876953125,
+      "learning_rate": 0.0001916500078426373,
+      "loss": 0.3392,
+      "reward": 0.8375,
+      "reward_std": 0.22023502588272095,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.8125,
+      "step": 1320
+    },
+    {
+      "completion_length": 55.025,
+      "epoch": 1.4181333333333335,
+      "grad_norm": 0.18106360733509064,
+      "kl": 1.674365234375,
+      "learning_rate": 0.0001898567708019196,
+      "loss": 0.2313,
+      "reward": 0.88125,
+      "reward_std": 0.17693375647068024,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.8625,
+      "step": 1330
+    },
+    {
+      "completion_length": 55.15625,
+      "epoch": 1.4288,
+      "grad_norm": 0.1365566849708557,
+      "kl": 2.2543212890625,
+      "learning_rate": 0.00018805738342847727,
+      "loss": 0.3175,
+      "reward": 0.878125,
+      "reward_std": 0.2048343911767006,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.85,
+      "step": 1340
+    },
+    {
+      "completion_length": 55.5625,
+      "epoch": 1.4394666666666667,
+      "grad_norm": 0.10590548813343048,
+      "kl": 1.425244140625,
+      "learning_rate": 0.00018625212338733,
+      "loss": 0.1371,
+      "reward": 0.8875,
+      "reward_std": 0.10915063470602035,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.859375,
+      "step": 1350
+    },
+    {
+      "completion_length": 53.225,
+      "epoch": 1.4501333333333333,
+      "grad_norm": 0.15103192627429962,
+      "kl": 1.1132080078125,
+      "learning_rate": 0.00018444126924971387,
+      "loss": 0.1228,
+      "reward": 0.953125,
+      "reward_std": 0.11838996410369873,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.915625,
+      "step": 1360
+    },
+    {
+      "completion_length": 57.265625,
+      "epoch": 1.4607999999999999,
+      "grad_norm": 0.05684982240200043,
+      "kl": 1.13671875,
+      "learning_rate": 0.0001826251004500947,
+      "loss": 0.1437,
+      "reward": 0.9125,
+      "reward_std": 0.19223694801330565,
+      "rewards/accuracy_reward": 0.065625,
+      "rewards/format_reward": 0.846875,
+      "step": 1370
+    },
+    {
+      "completion_length": 59.325,
+      "epoch": 1.4714666666666667,
+      "grad_norm": 0.17307031154632568,
+      "kl": 1.8568115234375,
+      "learning_rate": 0.0001808038972430486,
+      "loss": 0.2279,
+      "reward": 0.871875,
+      "reward_std": 0.16398502588272096,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.853125,
+      "step": 1380
+    },
+    {
+      "completion_length": 59.646875,
+      "epoch": 1.4821333333333333,
+      "grad_norm": 0.007796150632202625,
+      "kl": 2.4506591796875,
+      "learning_rate": 0.00017897794066001524,
+      "loss": 0.2992,
+      "reward": 0.84375,
+      "reward_std": 0.20420301407575608,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.809375,
+      "step": 1390
+    },
+    {
+      "completion_length": 58.3,
+      "epoch": 1.4928,
+      "grad_norm": 0.1803148239850998,
+      "kl": 1.1746826171875,
+      "learning_rate": 0.00017714751246593197,
+      "loss": 0.1374,
+      "reward": 0.85625,
+      "reward_std": 0.09665063470602035,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.84375,
+      "step": 1400
+    },
+    {
+      "completion_length": 51.30625,
+      "epoch": 1.5034666666666667,
+      "grad_norm": 0.12448029220104218,
+      "kl": 1.7854248046875,
+      "learning_rate": 0.00017531289511575425,
+      "loss": 0.2174,
+      "reward": 0.940625,
+      "reward_std": 0.17032372057437897,
+      "rewards/accuracy_reward": 0.04375,
+      "rewards/format_reward": 0.896875,
+      "step": 1410
+    },
+    {
+      "completion_length": 50.446875,
+      "epoch": 1.5141333333333333,
+      "grad_norm": 0.1189781054854393,
+      "kl": 1.4447998046875,
+      "learning_rate": 0.0001734743717108699,
+      "loss": 0.1672,
+      "reward": 0.959375,
+      "reward_std": 0.13282372057437897,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.921875,
+      "step": 1420
+    },
+    {
+      "completion_length": 55.278125,
+      "epoch": 1.5248,
+      "grad_norm": 0.10093328356742859,
+      "kl": 1.5136474609375,
+      "learning_rate": 0.0001716322259554132,
+      "loss": 0.1768,
+      "reward": 0.96875,
+      "reward_std": 0.19523502588272096,
+      "rewards/accuracy_reward": 0.065625,
+      "rewards/format_reward": 0.903125,
+      "step": 1430
+    },
+    {
+      "completion_length": 59.934375,
+      "epoch": 1.5354666666666668,
+      "grad_norm": 0.2610551714897156,
+      "kl": 1.778369140625,
+      "learning_rate": 0.00016978674211248673,
+      "loss": 0.2314,
+      "reward": 0.84375,
+      "reward_std": 0.20420301407575608,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.8125,
+      "step": 1440
+    },
+    {
+      "completion_length": 54.96875,
+      "epoch": 1.5461333333333334,
+      "grad_norm": 0.19814546406269073,
+      "kl": 2.6193603515625,
+      "learning_rate": 0.00016793820496029623,
+      "loss": 0.3738,
+      "reward": 0.88125,
+      "reward_std": 0.2356409251689911,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.85625,
+      "step": 1450
+    },
+    {
+      "completion_length": 51.703125,
+      "epoch": 1.5568,
+      "grad_norm": 0.1247173473238945,
+      "kl": 2.2877197265625,
+      "learning_rate": 0.000166086899748206,
+      "loss": 0.236,
+      "reward": 0.940625,
+      "reward_std": 0.2197028651833534,
+      "rewards/accuracy_reward": 0.053125,
+      "rewards/format_reward": 0.8875,
+      "step": 1460
+    },
+    {
+      "completion_length": 51.096875,
+      "epoch": 1.5674666666666668,
+      "grad_norm": 0.16843904554843903,
+      "kl": 1.2693115234375,
+      "learning_rate": 0.0001642331121527223,
+      "loss": 0.1801,
+      "reward": 0.959375,
+      "reward_std": 0.14867308586835862,
+      "rewards/accuracy_reward": 0.04375,
+      "rewards/format_reward": 0.915625,
+      "step": 1470
+    },
+    {
+      "completion_length": 55.465625,
+      "epoch": 1.5781333333333334,
+      "grad_norm": 0.12530925869941711,
+      "kl": 1.403857421875,
+      "learning_rate": 0.0001623771282334099,
+      "loss": 0.1621,
+      "reward": 0.921875,
+      "reward_std": 0.17568152397871017,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.875,
+      "step": 1480
+    },
+    {
+      "completion_length": 58.23125,
+      "epoch": 1.5888,
+      "grad_norm": 0.016702894121408463,
+      "kl": 0.8815673828125,
+      "learning_rate": 0.00016051923438875035,
+      "loss": 0.0918,
+      "reward": 0.8875,
+      "reward_std": 0.0879347175359726,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.853125,
+      "step": 1490
+    },
+    {
+      "completion_length": 53.565625,
+      "epoch": 1.5994666666666668,
+      "grad_norm": 0.09671846032142639,
+      "kl": 0.7630126953125,
+      "learning_rate": 0.00015865971731194738,
+      "loss": 0.0861,
+      "reward": 0.98125,
+      "reward_std": 0.11971687823534012,
+      "rewards/accuracy_reward": 0.040625,
+      "rewards/format_reward": 0.940625,
+      "step": 1500
+    },
+    {
+      "completion_length": 57.234375,
+      "epoch": 1.6101333333333332,
+      "grad_norm": 0.16410210728645325,
+      "kl": 0.922265625,
+      "learning_rate": 0.00015679886394668707,
+      "loss": 0.1231,
+      "reward": 0.925,
+      "reward_std": 0.10915063470602035,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.896875,
+      "step": 1510
+    },
+    {
+      "completion_length": 95.821875,
+      "epoch": 1.6208,
+      "grad_norm": 0.09543804824352264,
+      "kl": 6.0548583984375,
+      "learning_rate": 0.00015493696144285935,
+      "loss": 0.4094,
+      "reward": 0.284375,
+      "reward_std": 0.2681046098470688,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.259375,
+      "step": 1520
+    },
+    {
+      "completion_length": 113.446875,
+      "epoch": 1.6314666666666666,
+      "grad_norm": 0.006384687032550573,
+      "kl": 0.34674072265625,
+      "learning_rate": 0.00015307429711224754,
+      "loss": 0.0323,
+      "reward": 0.05,
+      "reward_std": 0.08221687823534012,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.01875,
+      "step": 1530
+    },
+    {
+      "completion_length": 116.7,
+      "epoch": 1.6421333333333332,
+      "grad_norm": 0.03033365309238434,
+      "kl": 0.18465576171875,
+      "learning_rate": 0.0001512111583841933,
+      "loss": 0.0367,
+      "reward": 0.053125,
+      "reward_std": 0.10625,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.025,
+      "step": 1540
+    },
+    {
+      "completion_length": 117.84375,
+      "epoch": 1.6528,
+      "grad_norm": 0.007193129975348711,
+      "kl": 0.1654541015625,
+      "learning_rate": 0.00014934783276124278,
+      "loss": 0.0284,
+      "reward": 0.059375,
+      "reward_std": 0.08318375647068024,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.034375,
+      "step": 1550
+    },
+    {
+      "completion_length": 111.459375,
+      "epoch": 1.6634666666666666,
+      "grad_norm": 0.024860132485628128,
+      "kl": 0.17579345703125,
+      "learning_rate": 0.00014748460777478208,
+      "loss": 0.0751,
+      "reward": 0.13125,
+      "reward_std": 0.20580126941204072,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.121875,
+      "step": 1560
+    },
+    {
+      "completion_length": 91.90625,
+      "epoch": 1.6741333333333333,
+      "grad_norm": 0.07996781170368195,
+      "kl": 0.19302978515625,
+      "learning_rate": 0.00014562177094066812,
+      "loss": 0.1666,
+      "reward": 0.478125,
+      "reward_std": 0.42759600281715393,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.45625,
+      "step": 1570
+    },
+    {
+      "completion_length": 73.571875,
+      "epoch": 1.6848,
+      "grad_norm": 0.07270823419094086,
+      "kl": 0.2430419921875,
+      "learning_rate": 0.0001437596097148615,
+      "loss": 0.1744,
+      "reward": 0.76875,
+      "reward_std": 0.332449671626091,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.740625,
+      "step": 1580
+    },
+    {
+      "completion_length": 64.4875,
+      "epoch": 1.6954666666666667,
+      "grad_norm": 0.1185784786939621,
+      "kl": 0.27333984375,
+      "learning_rate": 0.00014189841144906926,
+      "loss": 0.1684,
+      "reward": 0.80625,
+      "reward_std": 0.28343056291341784,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.778125,
+      "step": 1590
+    },
+    {
+      "completion_length": 59.8375,
+      "epoch": 1.7061333333333333,
+      "grad_norm": 0.30180656909942627,
+      "kl": 0.5548828125,
+      "learning_rate": 0.00014003846334640323,
+      "loss": 0.2054,
+      "reward": 0.740625,
+      "reward_std": 0.29256718456745145,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.728125,
+      "step": 1600
+    },
+    {
+      "completion_length": 59.284375,
+      "epoch": 1.7168,
+      "grad_norm": 0.26433998346328735,
+      "kl": 2.827392578125,
+      "learning_rate": 0.00013818005241706145,
+      "loss": 0.469,
+      "reward": 0.70625,
+      "reward_std": 0.33853629529476165,
+      "rewards/accuracy_reward": 0.00625,
+      "rewards/format_reward": 0.7,
+      "step": 1610
+    },
+    {
+      "completion_length": 53.78125,
+      "epoch": 1.7274666666666667,
+      "grad_norm": 0.2971569299697876,
+      "kl": 3.8951171875,
+      "learning_rate": 0.00013632346543403947,
+      "loss": 0.451,
+      "reward": 0.81875,
+      "reward_std": 0.23273502588272094,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.80625,
+      "step": 1620
+    },
+    {
+      "completion_length": 53.903125,
+      "epoch": 1.7381333333333333,
+      "grad_norm": 0.0691404640674591,
+      "kl": 1.3744140625,
+      "learning_rate": 0.00013446898888887804,
+      "loss": 0.1657,
+      "reward": 0.93125,
+      "reward_std": 0.13943375647068024,
+      "rewards/accuracy_reward": 0.015625,
+      "rewards/format_reward": 0.915625,
+      "step": 1630
+    },
+    {
+      "completion_length": 57.153125,
+      "epoch": 1.7488000000000001,
+      "grad_norm": 0.03758076950907707,
+      "kl": 0.640966796875,
+      "learning_rate": 0.00013261690894745442,
+      "loss": 0.0775,
+      "reward": 0.884375,
+      "reward_std": 0.07596687823534012,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.871875,
+      "step": 1640
+    },
+    {
+      "completion_length": 58.0125,
+      "epoch": 1.7594666666666665,
+      "grad_norm": 0.009222053922712803,
+      "kl": 1.1447265625,
+      "learning_rate": 0.00013076751140582394,
+      "loss": 0.1472,
+      "reward": 0.88125,
+      "reward_std": 0.14963996410369873,
+      "rewards/accuracy_reward": 0.05,
+      "rewards/format_reward": 0.83125,
+      "step": 1650
+    },
+    {
+      "completion_length": 55.746875,
+      "epoch": 1.7701333333333333,
+      "grad_norm": 0.029693789780139923,
+      "kl": 1.351123046875,
+      "learning_rate": 0.00012892108164611857,
+      "loss": 0.161,
+      "reward": 0.890625,
+      "reward_std": 0.14867308586835862,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.86875,
+      "step": 1660
+    },
+    {
+      "completion_length": 53.184375,
+      "epoch": 1.7808000000000002,
+      "grad_norm": 0.23443636298179626,
+      "kl": 1.46533203125,
+      "learning_rate": 0.00012707790459250904,
+      "loss": 0.1583,
+      "reward": 0.9625,
+      "reward_std": 0.14665063470602036,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.928125,
+      "step": 1670
+    },
+    {
+      "completion_length": 52.959375,
+      "epoch": 1.7914666666666665,
+      "grad_norm": 0.010363437235355377,
+      "kl": 1.73623046875,
+      "learning_rate": 0.0001252382646672384,
+      "loss": 0.1757,
+      "reward": 0.921875,
+      "reward_std": 0.16504059880971908,
+      "rewards/accuracy_reward": 0.021875,
+      "rewards/format_reward": 0.9,
+      "step": 1680
+    },
+    {
+      "completion_length": 55.971875,
+      "epoch": 1.8021333333333334,
+      "grad_norm": 0.0745161771774292,
+      "kl": 1.418701171875,
+      "learning_rate": 0.00012340244574673238,
+      "loss": 0.1882,
+      "reward": 0.91875,
+      "reward_std": 0.1851816728711128,
+      "rewards/accuracy_reward": 0.046875,
+      "rewards/format_reward": 0.871875,
+      "step": 1690
+    },
+    {
+      "completion_length": 57.903125,
+      "epoch": 1.8128,
+      "grad_norm": 0.09005508571863174,
+      "kl": 1.6597900390625,
+      "learning_rate": 0.000121570731117794,
+      "loss": 0.1998,
+      "reward": 0.8875,
+      "reward_std": 0.1477808892726898,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.86875,
+      "step": 1700
+    },
+    {
+      "completion_length": 58.240625,
+      "epoch": 1.8234666666666666,
+      "grad_norm": 0.09877178072929382,
+      "kl": 1.344873046875,
+      "learning_rate": 0.00011974340343388972,
+      "loss": 0.163,
+      "reward": 0.921875,
+      "reward_std": 0.18511751294136047,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.8875,
+      "step": 1710
+    },
+    {
+      "completion_length": 66.65,
+      "epoch": 1.8341333333333334,
+      "grad_norm": 0.09796544909477234,
+      "kl": 2.161865234375,
+      "learning_rate": 0.00011792074467153248,
+      "loss": 0.2649,
+      "reward": 0.81875,
+      "reward_std": 0.20843056291341783,
+      "rewards/accuracy_reward": 0.034375,
+      "rewards/format_reward": 0.784375,
+      "step": 1720
+    },
+    {
+      "completion_length": 65.9,
+      "epoch": 1.8448,
+      "grad_norm": 0.12498176097869873,
+      "kl": 2.451318359375,
+      "learning_rate": 0.00011610303608677008,
+      "loss": 0.3047,
+      "reward": 0.809375,
+      "reward_std": 0.268188039958477,
+      "rewards/accuracy_reward": 0.040625,
+      "rewards/format_reward": 0.76875,
+      "step": 1730
+    },
+    {
+      "completion_length": 61.346875,
+      "epoch": 1.8554666666666666,
+      "grad_norm": 0.40386486053466797,
+      "kl": 3.258935546875,
+      "learning_rate": 0.00011429055817178411,
+      "loss": 0.3857,
+      "reward": 0.846875,
+      "reward_std": 0.28369315564632414,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.81875,
+      "step": 1740
+    },
+    {
+      "completion_length": 58.859375,
+      "epoch": 1.8661333333333334,
+      "grad_norm": 0.12184485048055649,
+      "kl": 3.230029296875,
+      "learning_rate": 0.00011248359061160698,
+      "loss": 0.3751,
+      "reward": 0.86875,
+      "reward_std": 0.2617799282073975,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.8375,
+      "step": 1750
+    },
+    {
+      "completion_length": 57.69375,
+      "epoch": 1.8768,
+      "grad_norm": 0.1783817708492279,
+      "kl": 2.4540283203125,
+      "learning_rate": 0.00011068241224096347,
+      "loss": 0.2785,
+      "reward": 0.875,
+      "reward_std": 0.20430223047733306,
+      "rewards/accuracy_reward": 0.01875,
+      "rewards/format_reward": 0.85625,
+      "step": 1760
+    },
+    {
+      "completion_length": 64.459375,
+      "epoch": 1.8874666666666666,
+      "grad_norm": 0.038270145654678345,
+      "kl": 2.579345703125,
+      "learning_rate": 0.00010888730100124353,
+      "loss": 0.2952,
+      "reward": 0.815625,
+      "reward_std": 0.24689744114875795,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.790625,
+      "step": 1770
+    },
+    {
+      "completion_length": 61.403125,
+      "epoch": 1.8981333333333335,
+      "grad_norm": 0.07670488953590393,
+      "kl": 2.2768310546875,
+      "learning_rate": 0.00010709853389761286,
+      "loss": 0.3084,
+      "reward": 0.884375,
+      "reward_std": 0.23545301407575608,
+      "rewards/accuracy_reward": 0.0375,
+      "rewards/format_reward": 0.846875,
+      "step": 1780
+    },
+    {
+      "completion_length": 62.98125,
+      "epoch": 1.9088,
+      "grad_norm": 0.21110066771507263,
+      "kl": 3.0247314453125,
+      "learning_rate": 0.00010531638695626811,
+      "loss": 0.3866,
+      "reward": 0.8,
+      "reward_std": 0.2520918682217598,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.76875,
+      "step": 1790
+    },
+    {
+      "completion_length": 64.06875,
+      "epoch": 1.9194666666666667,
+      "grad_norm": 0.19934044778347015,
+      "kl": 4.1655029296875,
+      "learning_rate": 0.00010354113518184303,
+      "loss": 0.4661,
+      "reward": 0.784375,
+      "reward_std": 0.2908942475914955,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.75625,
+      "step": 1800
+    },
+    {
+      "completion_length": 57.765625,
+      "epoch": 1.9301333333333335,
+      "grad_norm": 0.07659115642309189,
+      "kl": 1.541015625,
+      "learning_rate": 0.000101773052514972,
+      "loss": 0.1994,
+      "reward": 0.865625,
+      "reward_std": 0.17630237936973572,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.840625,
+      "step": 1810
+    },
+    {
+      "completion_length": 58.328125,
+      "epoch": 1.9407999999999999,
+      "grad_norm": 0.053584493696689606,
+      "kl": 1.616796875,
+      "learning_rate": 0.00010001241179001836,
+      "loss": 0.2072,
+      "reward": 0.88125,
+      "reward_std": 0.15386751294136047,
+      "rewards/accuracy_reward": 0.0125,
+      "rewards/format_reward": 0.86875,
+      "step": 1820
+    },
+    {
+      "completion_length": 59.36875,
+      "epoch": 1.9514666666666667,
+      "grad_norm": 0.06573835760354996,
+      "kl": 1.567236328125,
+      "learning_rate": 9.825948469297301e-05,
+      "loss": 0.2048,
+      "reward": 0.921875,
+      "reward_std": 0.19004059880971907,
+      "rewards/accuracy_reward": 0.053125,
+      "rewards/format_reward": 0.86875,
+      "step": 1830
+    },
+    {
+      "completion_length": 55.890625,
+      "epoch": 1.9621333333333333,
+      "grad_norm": 0.18969739973545074,
+      "kl": 1.7246337890625,
+      "learning_rate": 9.651454171953012e-05,
+      "loss": 0.2224,
+      "reward": 0.946875,
+      "reward_std": 0.1664562076330185,
+      "rewards/accuracy_reward": 0.040625,
+      "rewards/format_reward": 0.90625,
+      "step": 1840
+    },
+    {
+      "completion_length": 60.59375,
+      "epoch": 1.9727999999999999,
+      "grad_norm": 0.04010459780693054,
+      "kl": 2.70771484375,
+      "learning_rate": 9.477785213334706e-05,
+      "loss": 0.3228,
+      "reward": 0.834375,
+      "reward_std": 0.22910557091236114,
+      "rewards/accuracy_reward": 0.025,
+      "rewards/format_reward": 0.809375,
+      "step": 1850
+    },
+    {
+      "completion_length": 63.846875,
+      "epoch": 1.9834666666666667,
+      "grad_norm": 0.2044885903596878,
+      "kl": 3.81484375,
+      "learning_rate": 9.30496839244936e-05,
+      "loss": 0.4808,
+      "reward": 0.753125,
+      "reward_std": 0.307637582719326,
+      "rewards/accuracy_reward": 0.028125,
+      "rewards/format_reward": 0.725,
+      "step": 1860
+    },
+    {
+      "completion_length": 63.05,
+      "epoch": 1.9941333333333333,
+      "grad_norm": 0.32608747482299805,
+      "kl": 2.995166015625,
+      "learning_rate": 9.133030376809867e-05,
+      "loss": 0.4066,
+      "reward": 0.734375,
+      "reward_std": 0.31027562469244,
+      "rewards/accuracy_reward": 0.009375,
+      "rewards/format_reward": 0.725,
+      "step": 1870
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2811,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1876/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e596a9f480193cbd2a4bf4d6fda1f9ed131e1dbe00394449bfdaab1880a0f79
+size 7544

checkpoint-1876/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1876/zero_to_fp32.py ADDED Viewed

	@@ -0,0 +1,674 @@

+#!/usr/bin/env python
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
+# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
+# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
+# the future. Once extracted, the weights don't require DeepSpeed and can be used in any
+# application.
+#
+# example:
+#   python zero_to_fp32.py . output_dir/
+#   or
+#   python zero_to_fp32.py . output_dir/ --safe_serialization
+import argparse
+import torch
+import glob
+import math
+import os
+import re
+import json
+from tqdm import tqdm
+from collections import OrderedDict
+from dataclasses import dataclass
+# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
+# DeepSpeed data structures it has to be available in the current python environment.
+from deepspeed.utils import logger
+from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
+                                            FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
+                                            FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
+@dataclass
+class zero_model_state:
+    buffers: dict()
+    param_shapes: dict()
+    shared_params: list
+    ds_version: int
+    frozen_param_shapes: dict()
+    frozen_param_fragments: dict()
+debug = 0
+# load to cpu
+device = torch.device('cpu')
+def atoi(text):
+    return int(text) if text.isdigit() else text
+def natural_keys(text):
+    '''
+    alist.sort(key=natural_keys) sorts in human order
+    http://nedbatchelder.com/blog/200712/human_sorting.html
+    (See Toothy's implementation in the comments)
+    '''
+    return [atoi(c) for c in re.split(r'(\d+)', text)]
+def get_model_state_file(checkpoint_dir, zero_stage):
+    if not os.path.isdir(checkpoint_dir):
+        raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
+    # there should be only one file
+    if zero_stage <= 2:
+        file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
+    elif zero_stage == 3:
+        file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
+    if not os.path.exists(file):
+        raise FileNotFoundError(f"can't find model states file at '{file}'")
+    return file
+def get_checkpoint_files(checkpoint_dir, glob_pattern):
+    # XXX: need to test that this simple glob rule works for multi-node setup too
+    ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
+    if len(ckpt_files) == 0:
+        raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
+    return ckpt_files
+def get_optim_files(checkpoint_dir):
+    return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
+def get_model_state_files(checkpoint_dir):
+    return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
+def parse_model_states(files):
+    zero_model_states = []
+    for file in files:
+        state_dict = torch.load(file, map_location=device)
+        if BUFFER_NAMES not in state_dict:
+            raise ValueError(f"{file} is not a model state checkpoint")
+        buffer_names = state_dict[BUFFER_NAMES]
+        if debug:
+            print("Found buffers:", buffer_names)
+        # recover just the buffers while restoring them to fp32 if they were saved in fp16
+        buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
+        param_shapes = state_dict[PARAM_SHAPES]
+        # collect parameters that are included in param_shapes
+        param_names = []
+        for s in param_shapes:
+            for name in s.keys():
+                param_names.append(name)
+        # update with frozen parameters
+        frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
+        if frozen_param_shapes is not None:
+            if debug:
+                print(f"Found frozen_param_shapes: {frozen_param_shapes}")
+            param_names += list(frozen_param_shapes.keys())
+        # handle shared params
+        shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
+        ds_version = state_dict.get(DS_VERSION, None)
+        frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
+        z_model_state = zero_model_state(buffers=buffers,
+                                         param_shapes=param_shapes,
+                                         shared_params=shared_params,
+                                         ds_version=ds_version,
+                                         frozen_param_shapes=frozen_param_shapes,
+                                         frozen_param_fragments=frozen_param_fragments)
+        zero_model_states.append(z_model_state)
+    return zero_model_states
+def parse_optim_states(files, ds_checkpoint_dir):
+    total_files = len(files)
+    state_dicts = []
+    for f in files:
+        state_dict = torch.load(f, map_location=device)
+        # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
+        # and also handle the case where it was already removed by another helper script
+        state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
+        state_dicts.append(state_dict)
+    if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
+        raise ValueError(f"{files[0]} is not a zero checkpoint")
+    zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
+    world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
+    # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
+    # parameters can be different from data parallelism for non-expert parameters. So we can just
+    # use the max of the partition_count to get the dp world_size.
+    if type(world_size) is list:
+        world_size = max(world_size)
+    if world_size != total_files:
+        raise ValueError(
+            f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
+            "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
+        )
+    # the groups are named differently in each stage
+    if zero_stage <= 2:
+        fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
+    elif zero_stage == 3:
+        fp32_groups_key = FP32_FLAT_GROUPS
+    else:
+        raise ValueError(f"unknown zero stage {zero_stage}")
+    if zero_stage <= 2:
+        fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
+    elif zero_stage == 3:
+        # if there is more than one param group, there will be multiple flattened tensors - one
+        # flattened tensor per group - for simplicity merge them into a single tensor
+        #
+        # XXX: could make the script more memory efficient for when there are multiple groups - it
+        # will require matching the sub-lists of param_shapes for each param group flattened tensor
+        fp32_flat_groups = [
+            torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
+        ]
+    return zero_stage, world_size, fp32_flat_groups
+def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
+    """
+    Returns fp32 state_dict reconstructed from ds checkpoint
+    Args:
+        - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
+    """
+    print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
+    optim_files = get_optim_files(ds_checkpoint_dir)
+    zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
+    print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
+    model_files = get_model_state_files(ds_checkpoint_dir)
+    zero_model_states = parse_model_states(model_files)
+    print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
+    if zero_stage <= 2:
+        return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                                          exclude_frozen_parameters)
+    elif zero_stage == 3:
+        return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                                          exclude_frozen_parameters)
+def _zero2_merge_frozen_params(state_dict, zero_model_states):
+    if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+        return
+    frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+    frozen_param_fragments = zero_model_states[0].frozen_param_fragments
+    if debug:
+        num_elem = sum(s.numel() for s in frozen_param_shapes.values())
+        print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+        wanted_params = len(frozen_param_shapes)
+        wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+        avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
+        print(f'Frozen params: Have {avail_numel} numels to process.')
+        print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+    total_params = 0
+    total_numel = 0
+    for name, shape in frozen_param_shapes.items():
+        total_params += 1
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        state_dict[name] = frozen_param_fragments[name]
+        if debug:
+            print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+    print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+def _has_callable(obj, fn):
+    attr = getattr(obj, fn, None)
+    return callable(attr)
+def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+    param_shapes = zero_model_states[0].param_shapes
+    # Reconstruction protocol:
+    #
+    # XXX: document this
+    if debug:
+        for i in range(world_size):
+            for j in range(len(fp32_flat_groups[0])):
+                print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
+    # XXX: memory usage doubles here (zero2)
+    num_param_groups = len(fp32_flat_groups[0])
+    merged_single_partition_of_fp32_groups = []
+    for i in range(num_param_groups):
+        merged_partitions = [sd[i] for sd in fp32_flat_groups]
+        full_single_fp32_vector = torch.cat(merged_partitions, 0)
+        merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
+    avail_numel = sum(
+        [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
+    if debug:
+        wanted_params = sum([len(shapes) for shapes in param_shapes])
+        wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
+        # not asserting if there is a mismatch due to possible padding
+        print(f"Have {avail_numel} numels to process.")
+        print(f"Need {wanted_numel} numels in {wanted_params} params.")
+    # params
+    # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+    # out-of-core computing solution
+    total_numel = 0
+    total_params = 0
+    for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
+        offset = 0
+        avail_numel = full_single_fp32_vector.numel()
+        for name, shape in shapes.items():
+            unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
+            total_numel += unpartitioned_numel
+            total_params += 1
+            if debug:
+                print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+            state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
+            offset += unpartitioned_numel
+        # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
+        # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
+        # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
+        # live optimizer object, so we are checking that the numbers are within the right range
+        align_to = 2 * world_size
+        def zero2_align(x):
+            return align_to * math.ceil(x / align_to)
+        if debug:
+            print(f"original offset={offset}, avail_numel={avail_numel}")
+        offset = zero2_align(offset)
+        avail_numel = zero2_align(avail_numel)
+        if debug:
+            print(f"aligned  offset={offset}, avail_numel={avail_numel}")
+        # Sanity check
+        if offset != avail_numel:
+            raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+    print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
+def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                               exclude_frozen_parameters):
+    state_dict = OrderedDict()
+    # buffers
+    buffers = zero_model_states[0].buffers
+    state_dict.update(buffers)
+    if debug:
+        print(f"added {len(buffers)} buffers")
+    if not exclude_frozen_parameters:
+        _zero2_merge_frozen_params(state_dict, zero_model_states)
+    _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+    # recover shared parameters
+    for pair in zero_model_states[0].shared_params:
+        if pair[1] in state_dict:
+            state_dict[pair[0]] = state_dict[pair[1]]
+    return state_dict
+def zero3_partitioned_param_info(unpartitioned_numel, world_size):
+    remainder = unpartitioned_numel % world_size
+    padding_numel = (world_size - remainder) if remainder else 0
+    partitioned_numel = math.ceil(unpartitioned_numel / world_size)
+    return partitioned_numel, padding_numel
+def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
+    if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+        return
+    if debug:
+        for i in range(world_size):
+            num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
+            print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+        frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+        wanted_params = len(frozen_param_shapes)
+        wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+        avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
+        print(f'Frozen params: Have {avail_numel} numels to process.')
+        print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+    total_params = 0
+    total_numel = 0
+    for name, shape in zero_model_states[0].frozen_param_shapes.items():
+        total_params += 1
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
+        state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
+        partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+        if debug:
+            print(
+                f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+            )
+    print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+    param_shapes = zero_model_states[0].param_shapes
+    avail_numel = fp32_flat_groups[0].numel() * world_size
+    # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
+    # param, re-consolidating each param, while dealing with padding if any
+    # merge list of dicts, preserving order
+    param_shapes = {k: v for d in param_shapes for k, v in d.items()}
+    if debug:
+        for i in range(world_size):
+            print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
+        wanted_params = len(param_shapes)
+        wanted_numel = sum(shape.numel() for shape in param_shapes.values())
+        # not asserting if there is a mismatch due to possible padding
+        avail_numel = fp32_flat_groups[0].numel() * world_size
+        print(f"Trainable params: Have {avail_numel} numels to process.")
+        print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
+    # params
+    # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+    # out-of-core computing solution
+    offset = 0
+    total_numel = 0
+    total_params = 0
+    for name, shape in tqdm(param_shapes.items(), desc='Gathering Sharded Weights'):
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        total_params += 1
+        partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+        if debug:
+            print(
+                f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+            )
+        # XXX: memory usage doubles here
+        state_dict[name] = torch.cat(
+            tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
+            0).narrow(0, 0, unpartitioned_numel).view(shape)
+        offset += partitioned_numel
+    offset *= world_size
+    # Sanity check
+    if offset != avail_numel:
+        raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+    print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
+def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                               exclude_frozen_parameters):
+    state_dict = OrderedDict()
+    # buffers
+    buffers = zero_model_states[0].buffers
+    state_dict.update(buffers)
+    if debug:
+        print(f"added {len(buffers)} buffers")
+    if not exclude_frozen_parameters:
+        _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
+    _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+    # recover shared parameters
+    for pair in zero_model_states[0].shared_params:
+        if pair[1] in state_dict:
+            state_dict[pair[0]] = state_dict[pair[1]]
+    return state_dict
+def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
+    """
+    Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
+    ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
+    via a model hub.
+    Args:
+        - ``checkpoint_dir``: path to the desired checkpoint folder
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
+        - ``exclude_frozen_parameters``: exclude frozen parameters
+    Returns:
+        - pytorch ``state_dict``
+    Note: this approach may not work if your application doesn't have sufficient free CPU memory and
+    you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
+    the checkpoint.
+    A typical usage might be ::
+        from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+        # do the training and checkpoint saving
+        state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
+        model = model.cpu() # move to cpu
+        model.load_state_dict(state_dict)
+        # submit to model hub or save the model to share with others
+    In this example the ``model`` will no longer be usable in the deepspeed context of the same
+    application. i.e. you will need to re-initialize the deepspeed engine, since
+    ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+    If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
+    """
+    if tag is None:
+        latest_path = os.path.join(checkpoint_dir, 'latest')
+        if os.path.isfile(latest_path):
+            with open(latest_path, 'r') as fd:
+                tag = fd.read().strip()
+        else:
+            raise ValueError(f"Unable to find 'latest' file at {latest_path}")
+    ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
+    if not os.path.isdir(ds_checkpoint_dir):
+        raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
+    return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
+def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir,
+                                               output_dir,
+                                               max_shard_size="5GB",
+                                               safe_serialization=False,
+                                               tag=None,
+                                               exclude_frozen_parameters=False):
+    """
+    Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
+    loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
+    Args:
+        - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+        - ``output_dir``: directory to the pytorch fp32 state_dict output files
+        - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB
+        - ``safe_serialization``:  whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+        - ``exclude_frozen_parameters``: exclude frozen parameters
+    """
+    # Dependency pre-check
+    if safe_serialization:
+        try:
+            from safetensors.torch import save_file
+        except ImportError:
+            print('If you want to use `safe_serialization`, please `pip install safetensors`')
+            raise
+    if max_shard_size is not None:
+        try:
+            from huggingface_hub import split_torch_state_dict_into_shards
+        except ImportError:
+            print('If you want to use `max_shard_size`, please `pip install huggingface_hub`')
+            raise
+    # Convert zero checkpoint to state_dict
+    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
+    # Shard the model if it is too big.
+    weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin"
+    if max_shard_size is not None:
+        filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
+        state_dict_split = split_torch_state_dict_into_shards(state_dict,
+                                                              filename_pattern=filename_pattern,
+                                                              max_shard_size=max_shard_size)
+    else:
+        from collections import namedtuple
+        StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"])
+        state_dict_split = StateDictSplit(is_sharded=False,
+                                          filename_to_tensors={weights_name: list(state_dict.keys())})
+    # Save the model
+    filename_to_tensors = state_dict_split.filename_to_tensors.items()
+    for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"):
+        shard = {tensor: state_dict[tensor].contiguous() for tensor in tensors}
+        output_path = os.path.join(output_dir, shard_file)
+        if safe_serialization:
+            save_file(shard, output_path, metadata={"format": "pt"})
+        else:
+            torch.save(shard, output_path)
+    # Save index if sharded
+    if state_dict_split.is_sharded:
+        index = {
+            "metadata": state_dict_split.metadata,
+            "weight_map": state_dict_split.tensor_to_filename,
+        }
+        save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json"
+        save_index_file = os.path.join(output_dir, save_index_file)
+        with open(save_index_file, "w", encoding="utf-8") as f:
+            content = json.dumps(index, indent=2, sort_keys=True) + "\n"
+            f.write(content)
+def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
+    """
+    1. Put the provided model to cpu
+    2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
+    3. Load it into the provided model
+    Args:
+        - ``model``: the model object to update
+        - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+    Returns:
+        - ``model`: modified model
+    Make sure you have plenty of CPU memory available before you call this function. If you don't
+    have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
+    conveniently placed for you in the checkpoint folder.
+    A typical usage might be ::
+        from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+        model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+        # submit to model hub or save the model to share with others
+    Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
+    of the same application. i.e. you will need to re-initialize the deepspeed engine, since
+    ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+    """
+    logger.info(f"Extracting fp32 weights")
+    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
+    logger.info(f"Overwriting model with fp32 weights")
+    model = model.cpu()
+    model.load_state_dict(state_dict, strict=False)
+    return model
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("checkpoint_dir",
+                        type=str,
+                        help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
+    parser.add_argument("output_dir",
+                        type=str,
+                        help="directory to the pytorch fp32 state_dict output files"
+                        "(e.g. path/checkpoint-12-output/)")
+    parser.add_argument(
+        "--max_shard_size",
+        type=str,
+        default="5GB",
+        help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size"
+        "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`"
+        "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances"
+        "without CPU OOM issues.")
+    parser.add_argument(
+        "--safe_serialization",
+        default=False,
+        action='store_true',
+        help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).")
+    parser.add_argument("-t",
+                        "--tag",
+                        type=str,
+                        default=None,
+                        help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
+    parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
+    parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
+    args = parser.parse_args()
+    debug = args.debug
+    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
+                                               args.output_dir,
+                                               max_shard_size=args.max_shard_size,
+                                               safe_serialization=args.safe_serialization,
+                                               tag=args.tag,
+                                               exclude_frozen_parameters=args.exclude_frozen_parameters)

checkpoint-2811/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen2.5-1.5B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-2811/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-1.5B",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "embed_tokens",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-2811/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0e2d12773cde23612f66f956756bfff79b5088a590085701d068e152e8b9f0d
+size 488520640

checkpoint-2811/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-2811/global_step2810/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd7034c580e6acd4b359e2cea8d943b8921122f1d8c1cc9cb50dc1a4196b681
+size 130520624

checkpoint-2811/global_step2810/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ded2b4ee29d3c0bb6b96ec7a9d4e62d8b04f5fdea19018fb908b35ad044528d1
+size 488645432

checkpoint-2811/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step2810

checkpoint-2811/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2811/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6647a496131c714e6f25c6ed38080948283f93dbdaa708df34a03fc09a51826
+size 14244

checkpoint-2811/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a702cef1bbbaf4b23cf4944bfd88935ecce43dafe80ceae33d9d07e5c46a0fa
+size 1064

checkpoint-2811/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-2811/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
+size 11422063

checkpoint-2811/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

checkpoint-2811/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2811/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e596a9f480193cbd2a4bf4d6fda1f9ed131e1dbe00394449bfdaab1880a0f79
+size 7544

checkpoint-2811/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2811/zero_to_fp32.py ADDED Viewed

	@@ -0,0 +1,674 @@

+#!/usr/bin/env python
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
+# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
+# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
+# the future. Once extracted, the weights don't require DeepSpeed and can be used in any
+# application.
+#
+# example:
+#   python zero_to_fp32.py . output_dir/
+#   or
+#   python zero_to_fp32.py . output_dir/ --safe_serialization
+import argparse
+import torch
+import glob
+import math
+import os
+import re
+import json
+from tqdm import tqdm
+from collections import OrderedDict
+from dataclasses import dataclass
+# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
+# DeepSpeed data structures it has to be available in the current python environment.
+from deepspeed.utils import logger
+from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
+                                            FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
+                                            FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
+@dataclass
+class zero_model_state:
+    buffers: dict()
+    param_shapes: dict()
+    shared_params: list
+    ds_version: int
+    frozen_param_shapes: dict()
+    frozen_param_fragments: dict()
+debug = 0
+# load to cpu
+device = torch.device('cpu')
+def atoi(text):
+    return int(text) if text.isdigit() else text
+def natural_keys(text):
+    '''
+    alist.sort(key=natural_keys) sorts in human order
+    http://nedbatchelder.com/blog/200712/human_sorting.html
+    (See Toothy's implementation in the comments)
+    '''
+    return [atoi(c) for c in re.split(r'(\d+)', text)]
+def get_model_state_file(checkpoint_dir, zero_stage):
+    if not os.path.isdir(checkpoint_dir):
+        raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
+    # there should be only one file
+    if zero_stage <= 2:
+        file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
+    elif zero_stage == 3:
+        file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
+    if not os.path.exists(file):
+        raise FileNotFoundError(f"can't find model states file at '{file}'")
+    return file
+def get_checkpoint_files(checkpoint_dir, glob_pattern):
+    # XXX: need to test that this simple glob rule works for multi-node setup too
+    ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
+    if len(ckpt_files) == 0:
+        raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
+    return ckpt_files
+def get_optim_files(checkpoint_dir):
+    return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
+def get_model_state_files(checkpoint_dir):
+    return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
+def parse_model_states(files):
+    zero_model_states = []
+    for file in files:
+        state_dict = torch.load(file, map_location=device)
+        if BUFFER_NAMES not in state_dict:
+            raise ValueError(f"{file} is not a model state checkpoint")
+        buffer_names = state_dict[BUFFER_NAMES]
+        if debug:
+            print("Found buffers:", buffer_names)
+        # recover just the buffers while restoring them to fp32 if they were saved in fp16
+        buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
+        param_shapes = state_dict[PARAM_SHAPES]
+        # collect parameters that are included in param_shapes
+        param_names = []
+        for s in param_shapes:
+            for name in s.keys():
+                param_names.append(name)
+        # update with frozen parameters
+        frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
+        if frozen_param_shapes is not None:
+            if debug:
+                print(f"Found frozen_param_shapes: {frozen_param_shapes}")
+            param_names += list(frozen_param_shapes.keys())
+        # handle shared params
+        shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
+        ds_version = state_dict.get(DS_VERSION, None)
+        frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
+        z_model_state = zero_model_state(buffers=buffers,
+                                         param_shapes=param_shapes,
+                                         shared_params=shared_params,
+                                         ds_version=ds_version,
+                                         frozen_param_shapes=frozen_param_shapes,
+                                         frozen_param_fragments=frozen_param_fragments)
+        zero_model_states.append(z_model_state)
+    return zero_model_states
+def parse_optim_states(files, ds_checkpoint_dir):
+    total_files = len(files)
+    state_dicts = []
+    for f in files:
+        state_dict = torch.load(f, map_location=device)
+        # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
+        # and also handle the case where it was already removed by another helper script
+        state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
+        state_dicts.append(state_dict)
+    if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
+        raise ValueError(f"{files[0]} is not a zero checkpoint")
+    zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
+    world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
+    # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
+    # parameters can be different from data parallelism for non-expert parameters. So we can just
+    # use the max of the partition_count to get the dp world_size.
+    if type(world_size) is list:
+        world_size = max(world_size)
+    if world_size != total_files:
+        raise ValueError(
+            f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
+            "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
+        )
+    # the groups are named differently in each stage
+    if zero_stage <= 2:
+        fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
+    elif zero_stage == 3:
+        fp32_groups_key = FP32_FLAT_GROUPS
+    else:
+        raise ValueError(f"unknown zero stage {zero_stage}")
+    if zero_stage <= 2:
+        fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
+    elif zero_stage == 3:
+        # if there is more than one param group, there will be multiple flattened tensors - one
+        # flattened tensor per group - for simplicity merge them into a single tensor
+        #
+        # XXX: could make the script more memory efficient for when there are multiple groups - it
+        # will require matching the sub-lists of param_shapes for each param group flattened tensor
+        fp32_flat_groups = [
+            torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
+        ]
+    return zero_stage, world_size, fp32_flat_groups
+def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
+    """
+    Returns fp32 state_dict reconstructed from ds checkpoint
+    Args:
+        - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
+    """
+    print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
+    optim_files = get_optim_files(ds_checkpoint_dir)
+    zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
+    print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
+    model_files = get_model_state_files(ds_checkpoint_dir)
+    zero_model_states = parse_model_states(model_files)
+    print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
+    if zero_stage <= 2:
+        return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                                          exclude_frozen_parameters)
+    elif zero_stage == 3:
+        return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                                          exclude_frozen_parameters)
+def _zero2_merge_frozen_params(state_dict, zero_model_states):
+    if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+        return
+    frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+    frozen_param_fragments = zero_model_states[0].frozen_param_fragments
+    if debug:
+        num_elem = sum(s.numel() for s in frozen_param_shapes.values())
+        print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+        wanted_params = len(frozen_param_shapes)
+        wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+        avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
+        print(f'Frozen params: Have {avail_numel} numels to process.')
+        print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+    total_params = 0
+    total_numel = 0
+    for name, shape in frozen_param_shapes.items():
+        total_params += 1
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        state_dict[name] = frozen_param_fragments[name]
+        if debug:
+            print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+    print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+def _has_callable(obj, fn):
+    attr = getattr(obj, fn, None)
+    return callable(attr)
+def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+    param_shapes = zero_model_states[0].param_shapes
+    # Reconstruction protocol:
+    #
+    # XXX: document this
+    if debug:
+        for i in range(world_size):
+            for j in range(len(fp32_flat_groups[0])):
+                print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
+    # XXX: memory usage doubles here (zero2)
+    num_param_groups = len(fp32_flat_groups[0])
+    merged_single_partition_of_fp32_groups = []
+    for i in range(num_param_groups):
+        merged_partitions = [sd[i] for sd in fp32_flat_groups]
+        full_single_fp32_vector = torch.cat(merged_partitions, 0)
+        merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
+    avail_numel = sum(
+        [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
+    if debug:
+        wanted_params = sum([len(shapes) for shapes in param_shapes])
+        wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
+        # not asserting if there is a mismatch due to possible padding
+        print(f"Have {avail_numel} numels to process.")
+        print(f"Need {wanted_numel} numels in {wanted_params} params.")
+    # params
+    # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+    # out-of-core computing solution
+    total_numel = 0
+    total_params = 0
+    for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
+        offset = 0
+        avail_numel = full_single_fp32_vector.numel()
+        for name, shape in shapes.items():
+            unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
+            total_numel += unpartitioned_numel
+            total_params += 1
+            if debug:
+                print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+            state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
+            offset += unpartitioned_numel
+        # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
+        # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
+        # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
+        # live optimizer object, so we are checking that the numbers are within the right range
+        align_to = 2 * world_size
+        def zero2_align(x):
+            return align_to * math.ceil(x / align_to)
+        if debug:
+            print(f"original offset={offset}, avail_numel={avail_numel}")
+        offset = zero2_align(offset)
+        avail_numel = zero2_align(avail_numel)
+        if debug:
+            print(f"aligned  offset={offset}, avail_numel={avail_numel}")
+        # Sanity check
+        if offset != avail_numel:
+            raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+    print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
+def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                               exclude_frozen_parameters):
+    state_dict = OrderedDict()
+    # buffers
+    buffers = zero_model_states[0].buffers
+    state_dict.update(buffers)
+    if debug:
+        print(f"added {len(buffers)} buffers")
+    if not exclude_frozen_parameters:
+        _zero2_merge_frozen_params(state_dict, zero_model_states)
+    _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+    # recover shared parameters
+    for pair in zero_model_states[0].shared_params:
+        if pair[1] in state_dict:
+            state_dict[pair[0]] = state_dict[pair[1]]
+    return state_dict
+def zero3_partitioned_param_info(unpartitioned_numel, world_size):
+    remainder = unpartitioned_numel % world_size
+    padding_numel = (world_size - remainder) if remainder else 0
+    partitioned_numel = math.ceil(unpartitioned_numel / world_size)
+    return partitioned_numel, padding_numel
+def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
+    if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+        return
+    if debug:
+        for i in range(world_size):
+            num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
+            print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+        frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+        wanted_params = len(frozen_param_shapes)
+        wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+        avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
+        print(f'Frozen params: Have {avail_numel} numels to process.')
+        print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+    total_params = 0
+    total_numel = 0
+    for name, shape in zero_model_states[0].frozen_param_shapes.items():
+        total_params += 1
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
+        state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
+        partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+        if debug:
+            print(
+                f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+            )
+    print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+    param_shapes = zero_model_states[0].param_shapes
+    avail_numel = fp32_flat_groups[0].numel() * world_size
+    # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
+    # param, re-consolidating each param, while dealing with padding if any
+    # merge list of dicts, preserving order
+    param_shapes = {k: v for d in param_shapes for k, v in d.items()}
+    if debug:
+        for i in range(world_size):
+            print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
+        wanted_params = len(param_shapes)
+        wanted_numel = sum(shape.numel() for shape in param_shapes.values())
+        # not asserting if there is a mismatch due to possible padding
+        avail_numel = fp32_flat_groups[0].numel() * world_size
+        print(f"Trainable params: Have {avail_numel} numels to process.")
+        print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
+    # params
+    # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+    # out-of-core computing solution
+    offset = 0
+    total_numel = 0
+    total_params = 0
+    for name, shape in tqdm(param_shapes.items(), desc='Gathering Sharded Weights'):
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        total_params += 1
+        partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+        if debug:
+            print(
+                f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+            )
+        # XXX: memory usage doubles here
+        state_dict[name] = torch.cat(
+            tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
+            0).narrow(0, 0, unpartitioned_numel).view(shape)
+        offset += partitioned_numel
+    offset *= world_size
+    # Sanity check
+    if offset != avail_numel:
+        raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+    print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
+def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                               exclude_frozen_parameters):
+    state_dict = OrderedDict()
+    # buffers
+    buffers = zero_model_states[0].buffers
+    state_dict.update(buffers)
+    if debug:
+        print(f"added {len(buffers)} buffers")
+    if not exclude_frozen_parameters:
+        _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
+    _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+    # recover shared parameters
+    for pair in zero_model_states[0].shared_params:
+        if pair[1] in state_dict:
+            state_dict[pair[0]] = state_dict[pair[1]]
+    return state_dict
+def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
+    """
+    Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
+    ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
+    via a model hub.
+    Args:
+        - ``checkpoint_dir``: path to the desired checkpoint folder
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
+        - ``exclude_frozen_parameters``: exclude frozen parameters
+    Returns:
+        - pytorch ``state_dict``
+    Note: this approach may not work if your application doesn't have sufficient free CPU memory and
+    you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
+    the checkpoint.
+    A typical usage might be ::
+        from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+        # do the training and checkpoint saving
+        state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
+        model = model.cpu() # move to cpu
+        model.load_state_dict(state_dict)
+        # submit to model hub or save the model to share with others
+    In this example the ``model`` will no longer be usable in the deepspeed context of the same
+    application. i.e. you will need to re-initialize the deepspeed engine, since
+    ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+    If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
+    """
+    if tag is None:
+        latest_path = os.path.join(checkpoint_dir, 'latest')
+        if os.path.isfile(latest_path):
+            with open(latest_path, 'r') as fd:
+                tag = fd.read().strip()
+        else:
+            raise ValueError(f"Unable to find 'latest' file at {latest_path}")
+    ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
+    if not os.path.isdir(ds_checkpoint_dir):
+        raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
+    return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
+def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir,
+                                               output_dir,
+                                               max_shard_size="5GB",
+                                               safe_serialization=False,
+                                               tag=None,
+                                               exclude_frozen_parameters=False):
+    """
+    Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
+    loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
+    Args:
+        - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+        - ``output_dir``: directory to the pytorch fp32 state_dict output files
+        - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB
+        - ``safe_serialization``:  whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+        - ``exclude_frozen_parameters``: exclude frozen parameters
+    """
+    # Dependency pre-check
+    if safe_serialization:
+        try:
+            from safetensors.torch import save_file
+        except ImportError:
+            print('If you want to use `safe_serialization`, please `pip install safetensors`')
+            raise
+    if max_shard_size is not None:
+        try:
+            from huggingface_hub import split_torch_state_dict_into_shards
+        except ImportError:
+            print('If you want to use `max_shard_size`, please `pip install huggingface_hub`')
+            raise
+    # Convert zero checkpoint to state_dict
+    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
+    # Shard the model if it is too big.
+    weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin"
+    if max_shard_size is not None:
+        filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
+        state_dict_split = split_torch_state_dict_into_shards(state_dict,
+                                                              filename_pattern=filename_pattern,
+                                                              max_shard_size=max_shard_size)
+    else:
+        from collections import namedtuple
+        StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"])
+        state_dict_split = StateDictSplit(is_sharded=False,
+                                          filename_to_tensors={weights_name: list(state_dict.keys())})
+    # Save the model
+    filename_to_tensors = state_dict_split.filename_to_tensors.items()
+    for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"):
+        shard = {tensor: state_dict[tensor].contiguous() for tensor in tensors}
+        output_path = os.path.join(output_dir, shard_file)
+        if safe_serialization:
+            save_file(shard, output_path, metadata={"format": "pt"})
+        else:
+            torch.save(shard, output_path)
+    # Save index if sharded
+    if state_dict_split.is_sharded:
+        index = {
+            "metadata": state_dict_split.metadata,
+            "weight_map": state_dict_split.tensor_to_filename,
+        }
+        save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json"
+        save_index_file = os.path.join(output_dir, save_index_file)
+        with open(save_index_file, "w", encoding="utf-8") as f:
+            content = json.dumps(index, indent=2, sort_keys=True) + "\n"
+            f.write(content)
+def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
+    """
+    1. Put the provided model to cpu
+    2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
+    3. Load it into the provided model
+    Args:
+        - ``model``: the model object to update
+        - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+    Returns:
+        - ``model`: modified model
+    Make sure you have plenty of CPU memory available before you call this function. If you don't
+    have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
+    conveniently placed for you in the checkpoint folder.
+    A typical usage might be ::
+        from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+        model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+        # submit to model hub or save the model to share with others
+    Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
+    of the same application. i.e. you will need to re-initialize the deepspeed engine, since
+    ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+    """
+    logger.info(f"Extracting fp32 weights")
+    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
+    logger.info(f"Overwriting model with fp32 weights")
+    model = model.cpu()
+    model.load_state_dict(state_dict, strict=False)
+    return model
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("checkpoint_dir",
+                        type=str,
+                        help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
+    parser.add_argument("output_dir",
+                        type=str,
+                        help="directory to the pytorch fp32 state_dict output files"
+                        "(e.g. path/checkpoint-12-output/)")
+    parser.add_argument(
+        "--max_shard_size",
+        type=str,
+        default="5GB",
+        help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size"
+        "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`"
+        "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances"
+        "without CPU OOM issues.")
+    parser.add_argument(
+        "--safe_serialization",
+        default=False,
+        action='store_true',
+        help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).")
+    parser.add_argument("-t",
+                        "--tag",
+                        type=str,
+                        default=None,
+                        help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
+    parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
+    parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
+    args = parser.parse_args()
+    debug = args.debug
+    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
+                                               args.output_dir,
+                                               max_shard_size=args.max_shard_size,
+                                               safe_serialization=args.safe_serialization,
+                                               tag=args.tag,
+                                               exclude_frozen_parameters=args.exclude_frozen_parameters)

checkpoint-938/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen2.5-1.5B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-938/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-1.5B",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "embed_tokens",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-938/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c32b4744a5f1e25fab4fcef69341d68015e77a0722895c2178ac5d0909e2dd89
+size 488520640

checkpoint-938/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-938/global_step937/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c69eca94feda5cb826eb693257e9258b78632b4b266e40c3212f1d5c7800fe2
+size 130520624

checkpoint-938/global_step937/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5112b9399d6c69ae4dd09f6f8f74af27784d1b9795cde3e95246128f650ad458
+size 488645432

checkpoint-938/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step937

checkpoint-938/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-938/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85ff83c9a62e5d58853a2656865d280c0e257ccdcd65dad6bd3060f966059592
+size 14244

checkpoint-938/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7328f6df045aa7953e963c7065b6c08cd9c4b4a17f305d9186fb192f49d75a3f
+size 1064