Spaces:
Runtime error
Runtime error
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 126.14259597806216, | |
| "global_step": 69000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.990923217550275e-05, | |
| "loss": 4.2068, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 2.99944806098938, | |
| "eval_runtime": 10.4226, | |
| "eval_samples_per_second": 35.02, | |
| "eval_steps_per_second": 5.853, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 4.981782449725777e-05, | |
| "loss": 3.0778, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "eval_loss": 2.904747724533081, | |
| "eval_runtime": 10.18, | |
| "eval_samples_per_second": 35.855, | |
| "eval_steps_per_second": 5.992, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 4.97264168190128e-05, | |
| "loss": 2.9001, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "eval_loss": 2.847013473510742, | |
| "eval_runtime": 10.175, | |
| "eval_samples_per_second": 35.872, | |
| "eval_steps_per_second": 5.995, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 4.963500914076782e-05, | |
| "loss": 2.7675, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "eval_loss": 2.816072463989258, | |
| "eval_runtime": 9.921, | |
| "eval_samples_per_second": 36.791, | |
| "eval_steps_per_second": 6.149, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 4.9543601462522856e-05, | |
| "loss": 2.6493, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "eval_loss": 2.8051352500915527, | |
| "eval_runtime": 9.961, | |
| "eval_samples_per_second": 36.643, | |
| "eval_steps_per_second": 6.124, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 4.9452193784277884e-05, | |
| "loss": 2.5494, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "eval_loss": 2.7774672508239746, | |
| "eval_runtime": 9.908, | |
| "eval_samples_per_second": 36.839, | |
| "eval_steps_per_second": 6.157, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 4.936078610603291e-05, | |
| "loss": 2.4531, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "eval_loss": 2.7776949405670166, | |
| "eval_runtime": 9.931, | |
| "eval_samples_per_second": 36.754, | |
| "eval_steps_per_second": 6.142, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 4.926937842778794e-05, | |
| "loss": 2.3665, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "eval_loss": 2.78460955619812, | |
| "eval_runtime": 9.938, | |
| "eval_samples_per_second": 36.728, | |
| "eval_steps_per_second": 6.138, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "learning_rate": 4.917797074954296e-05, | |
| "loss": 2.2844, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "eval_loss": 2.792381763458252, | |
| "eval_runtime": 9.915, | |
| "eval_samples_per_second": 36.813, | |
| "eval_steps_per_second": 6.152, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 18.28, | |
| "learning_rate": 4.908656307129799e-05, | |
| "loss": 2.2125, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 18.28, | |
| "eval_loss": 2.820063352584839, | |
| "eval_runtime": 9.918, | |
| "eval_samples_per_second": 36.802, | |
| "eval_steps_per_second": 6.15, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "learning_rate": 4.899515539305301e-05, | |
| "loss": 2.1433, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "eval_loss": 2.823256015777588, | |
| "eval_runtime": 9.913, | |
| "eval_samples_per_second": 36.82, | |
| "eval_steps_per_second": 6.154, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 4.890374771480805e-05, | |
| "loss": 2.0681, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "eval_loss": 2.834426164627075, | |
| "eval_runtime": 9.923, | |
| "eval_samples_per_second": 36.783, | |
| "eval_steps_per_second": 6.147, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 23.77, | |
| "learning_rate": 4.8812340036563075e-05, | |
| "loss": 2.0057, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 23.77, | |
| "eval_loss": 2.869232177734375, | |
| "eval_runtime": 9.934, | |
| "eval_samples_per_second": 36.743, | |
| "eval_steps_per_second": 6.141, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 25.59, | |
| "learning_rate": 4.87209323583181e-05, | |
| "loss": 1.9376, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 25.59, | |
| "eval_loss": 2.875586986541748, | |
| "eval_runtime": 9.917, | |
| "eval_samples_per_second": 36.805, | |
| "eval_steps_per_second": 6.151, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "learning_rate": 4.862952468007313e-05, | |
| "loss": 1.884, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "eval_loss": 2.9035284519195557, | |
| "eval_runtime": 9.938, | |
| "eval_samples_per_second": 36.728, | |
| "eval_steps_per_second": 6.138, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "learning_rate": 4.8538117001828156e-05, | |
| "loss": 1.8166, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "eval_loss": 2.9383809566497803, | |
| "eval_runtime": 10.0188, | |
| "eval_samples_per_second": 36.432, | |
| "eval_steps_per_second": 6.089, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 31.08, | |
| "learning_rate": 4.8446800731261424e-05, | |
| "loss": 1.7685, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 31.08, | |
| "eval_loss": 2.959977626800537, | |
| "eval_runtime": 9.932, | |
| "eval_samples_per_second": 36.75, | |
| "eval_steps_per_second": 6.142, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 32.91, | |
| "learning_rate": 4.835539305301646e-05, | |
| "loss": 1.7101, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 32.91, | |
| "eval_loss": 2.996220588684082, | |
| "eval_runtime": 9.924, | |
| "eval_samples_per_second": 36.779, | |
| "eval_steps_per_second": 6.147, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 34.73, | |
| "learning_rate": 4.826407678244973e-05, | |
| "loss": 1.6541, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 34.73, | |
| "eval_loss": 3.002917766571045, | |
| "eval_runtime": 9.937, | |
| "eval_samples_per_second": 36.731, | |
| "eval_steps_per_second": 6.139, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 36.56, | |
| "learning_rate": 4.8172669104204753e-05, | |
| "loss": 1.6023, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 36.56, | |
| "eval_loss": 3.0461058616638184, | |
| "eval_runtime": 9.937, | |
| "eval_samples_per_second": 36.731, | |
| "eval_steps_per_second": 6.139, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 38.39, | |
| "learning_rate": 4.808135283363803e-05, | |
| "loss": 1.5581, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 38.39, | |
| "eval_loss": 3.0910513401031494, | |
| "eval_runtime": 9.922, | |
| "eval_samples_per_second": 36.787, | |
| "eval_steps_per_second": 6.148, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 40.22, | |
| "learning_rate": 4.79900365630713e-05, | |
| "loss": 1.5083, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 40.22, | |
| "eval_loss": 3.1346936225891113, | |
| "eval_runtime": 9.937, | |
| "eval_samples_per_second": 36.731, | |
| "eval_steps_per_second": 6.139, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 42.05, | |
| "learning_rate": 4.789862888482633e-05, | |
| "loss": 1.4624, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 42.05, | |
| "eval_loss": 3.1444737911224365, | |
| "eval_runtime": 9.916, | |
| "eval_samples_per_second": 36.809, | |
| "eval_steps_per_second": 6.152, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 43.88, | |
| "learning_rate": 4.780722120658136e-05, | |
| "loss": 1.4115, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 43.88, | |
| "eval_loss": 3.140058755874634, | |
| "eval_runtime": 9.944, | |
| "eval_samples_per_second": 36.705, | |
| "eval_steps_per_second": 6.134, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 45.7, | |
| "learning_rate": 4.7715813528336384e-05, | |
| "loss": 1.3686, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 45.7, | |
| "eval_loss": 3.195690393447876, | |
| "eval_runtime": 9.91, | |
| "eval_samples_per_second": 36.831, | |
| "eval_steps_per_second": 6.155, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 47.53, | |
| "learning_rate": 4.76245886654479e-05, | |
| "loss": 1.3243, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 47.53, | |
| "eval_loss": 3.271955966949463, | |
| "eval_runtime": 9.932, | |
| "eval_samples_per_second": 36.75, | |
| "eval_steps_per_second": 6.142, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 49.36, | |
| "learning_rate": 4.753318098720293e-05, | |
| "loss": 1.2869, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 49.36, | |
| "eval_loss": 3.26181697845459, | |
| "eval_runtime": 9.936, | |
| "eval_samples_per_second": 36.735, | |
| "eval_steps_per_second": 6.139, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 51.19, | |
| "learning_rate": 4.7441773308957954e-05, | |
| "loss": 1.2486, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 51.19, | |
| "eval_loss": 3.3045871257781982, | |
| "eval_runtime": 9.94, | |
| "eval_samples_per_second": 36.72, | |
| "eval_steps_per_second": 6.137, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 53.02, | |
| "learning_rate": 4.735036563071298e-05, | |
| "loss": 1.2066, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 53.02, | |
| "eval_loss": 3.35994029045105, | |
| "eval_runtime": 10.2683, | |
| "eval_samples_per_second": 35.546, | |
| "eval_steps_per_second": 5.941, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 54.84, | |
| "learning_rate": 4.725895795246801e-05, | |
| "loss": 1.1644, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 54.84, | |
| "eval_loss": 3.362321376800537, | |
| "eval_runtime": 9.972, | |
| "eval_samples_per_second": 36.602, | |
| "eval_steps_per_second": 6.117, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 56.67, | |
| "learning_rate": 4.716764168190128e-05, | |
| "loss": 1.1323, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 56.67, | |
| "eval_loss": 3.410172939300537, | |
| "eval_runtime": 10.015, | |
| "eval_samples_per_second": 36.445, | |
| "eval_steps_per_second": 6.091, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 58.5, | |
| "learning_rate": 4.707623400365631e-05, | |
| "loss": 1.0934, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 58.5, | |
| "eval_loss": 3.4784252643585205, | |
| "eval_runtime": 9.943, | |
| "eval_samples_per_second": 36.709, | |
| "eval_steps_per_second": 6.135, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 60.33, | |
| "learning_rate": 4.698482632541134e-05, | |
| "loss": 1.0587, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 60.33, | |
| "eval_loss": 3.483607053756714, | |
| "eval_runtime": 9.935, | |
| "eval_samples_per_second": 36.739, | |
| "eval_steps_per_second": 6.14, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 62.16, | |
| "learning_rate": 4.6893510054844606e-05, | |
| "loss": 1.0295, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 62.16, | |
| "eval_loss": 3.543713092803955, | |
| "eval_runtime": 9.955, | |
| "eval_samples_per_second": 36.665, | |
| "eval_steps_per_second": 6.128, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 63.99, | |
| "learning_rate": 4.680210237659964e-05, | |
| "loss": 0.9963, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 63.99, | |
| "eval_loss": 3.5333142280578613, | |
| "eval_runtime": 10.0438, | |
| "eval_samples_per_second": 36.341, | |
| "eval_steps_per_second": 6.073, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 65.81, | |
| "learning_rate": 4.6710786106032914e-05, | |
| "loss": 0.9617, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 65.81, | |
| "eval_loss": 3.5852224826812744, | |
| "eval_runtime": 10.0713, | |
| "eval_samples_per_second": 36.241, | |
| "eval_steps_per_second": 6.057, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 67.64, | |
| "learning_rate": 4.6619378427787935e-05, | |
| "loss": 0.9278, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 67.64, | |
| "eval_loss": 3.6299784183502197, | |
| "eval_runtime": 9.993, | |
| "eval_samples_per_second": 36.526, | |
| "eval_steps_per_second": 6.104, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 69.47, | |
| "learning_rate": 4.652797074954296e-05, | |
| "loss": 0.9027, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 69.47, | |
| "eval_loss": 3.644850969314575, | |
| "eval_runtime": 10.364, | |
| "eval_samples_per_second": 35.218, | |
| "eval_steps_per_second": 5.886, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 71.3, | |
| "learning_rate": 4.643656307129799e-05, | |
| "loss": 0.8732, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 71.3, | |
| "eval_loss": 3.703965187072754, | |
| "eval_runtime": 10.3219, | |
| "eval_samples_per_second": 35.362, | |
| "eval_steps_per_second": 5.91, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 73.13, | |
| "learning_rate": 4.634533820840951e-05, | |
| "loss": 0.8452, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 73.13, | |
| "eval_loss": 3.7363781929016113, | |
| "eval_runtime": 9.946, | |
| "eval_samples_per_second": 36.698, | |
| "eval_steps_per_second": 6.133, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 74.95, | |
| "learning_rate": 4.625393053016454e-05, | |
| "loss": 0.8197, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 74.95, | |
| "eval_loss": 3.753584861755371, | |
| "eval_runtime": 9.969, | |
| "eval_samples_per_second": 36.613, | |
| "eval_steps_per_second": 6.119, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 76.78, | |
| "learning_rate": 4.6162522851919566e-05, | |
| "loss": 0.7898, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 76.78, | |
| "eval_loss": 3.7998735904693604, | |
| "eval_runtime": 9.964, | |
| "eval_samples_per_second": 36.632, | |
| "eval_steps_per_second": 6.122, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 78.61, | |
| "learning_rate": 4.6071115173674586e-05, | |
| "loss": 0.7673, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 78.61, | |
| "eval_loss": 3.8310341835021973, | |
| "eval_runtime": 10.351, | |
| "eval_samples_per_second": 35.262, | |
| "eval_steps_per_second": 5.893, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 80.44, | |
| "learning_rate": 4.597970749542962e-05, | |
| "loss": 0.7389, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 80.44, | |
| "eval_loss": 3.882105588912964, | |
| "eval_runtime": 10.349, | |
| "eval_samples_per_second": 35.269, | |
| "eval_steps_per_second": 5.894, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 82.27, | |
| "learning_rate": 4.588829981718464e-05, | |
| "loss": 0.7192, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 82.27, | |
| "eval_loss": 3.898214101791382, | |
| "eval_runtime": 10.0882, | |
| "eval_samples_per_second": 36.181, | |
| "eval_steps_per_second": 6.047, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 84.1, | |
| "learning_rate": 4.5796983546617915e-05, | |
| "loss": 0.6965, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 84.1, | |
| "eval_loss": 3.9292538166046143, | |
| "eval_runtime": 9.98, | |
| "eval_samples_per_second": 36.573, | |
| "eval_steps_per_second": 6.112, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 85.92, | |
| "learning_rate": 4.570557586837295e-05, | |
| "loss": 0.674, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 85.92, | |
| "eval_loss": 3.9541404247283936, | |
| "eval_runtime": 9.947, | |
| "eval_samples_per_second": 36.694, | |
| "eval_steps_per_second": 6.133, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 87.75, | |
| "learning_rate": 4.561425959780622e-05, | |
| "loss": 0.6505, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 87.75, | |
| "eval_loss": 3.9937522411346436, | |
| "eval_runtime": 9.939, | |
| "eval_samples_per_second": 36.724, | |
| "eval_steps_per_second": 6.137, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 89.58, | |
| "learning_rate": 4.5522851919561245e-05, | |
| "loss": 0.6285, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 89.58, | |
| "eval_loss": 4.022762775421143, | |
| "eval_runtime": 9.95, | |
| "eval_samples_per_second": 36.683, | |
| "eval_steps_per_second": 6.131, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 91.41, | |
| "learning_rate": 4.543153564899452e-05, | |
| "loss": 0.6121, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 91.41, | |
| "eval_loss": 4.071320533752441, | |
| "eval_runtime": 9.912, | |
| "eval_samples_per_second": 36.824, | |
| "eval_steps_per_second": 6.154, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 93.24, | |
| "learning_rate": 4.5340127970749546e-05, | |
| "loss": 0.5924, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 93.24, | |
| "eval_loss": 4.105781555175781, | |
| "eval_runtime": 9.939, | |
| "eval_samples_per_second": 36.724, | |
| "eval_steps_per_second": 6.137, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 95.06, | |
| "learning_rate": 4.524881170018282e-05, | |
| "loss": 0.5755, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 95.06, | |
| "eval_loss": 4.130266189575195, | |
| "eval_runtime": 9.931, | |
| "eval_samples_per_second": 36.754, | |
| "eval_steps_per_second": 6.142, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 96.89, | |
| "learning_rate": 4.515740402193784e-05, | |
| "loss": 0.5539, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 96.89, | |
| "eval_loss": 4.161491394042969, | |
| "eval_runtime": 9.934, | |
| "eval_samples_per_second": 36.742, | |
| "eval_steps_per_second": 6.141, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 98.72, | |
| "learning_rate": 4.5065996343692876e-05, | |
| "loss": 0.5364, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 98.72, | |
| "eval_loss": 4.184945106506348, | |
| "eval_runtime": 9.926, | |
| "eval_samples_per_second": 36.772, | |
| "eval_steps_per_second": 6.145, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 100.55, | |
| "learning_rate": 4.4974680073126143e-05, | |
| "loss": 0.5185, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 100.55, | |
| "eval_loss": 4.228085041046143, | |
| "eval_runtime": 9.935, | |
| "eval_samples_per_second": 36.739, | |
| "eval_steps_per_second": 6.14, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 102.38, | |
| "learning_rate": 4.488327239488117e-05, | |
| "loss": 0.5053, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 102.38, | |
| "eval_loss": 4.275339126586914, | |
| "eval_runtime": 9.922, | |
| "eval_samples_per_second": 36.787, | |
| "eval_steps_per_second": 6.148, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 104.2, | |
| "learning_rate": 4.47918647166362e-05, | |
| "loss": 0.489, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 104.2, | |
| "eval_loss": 4.300563812255859, | |
| "eval_runtime": 9.951, | |
| "eval_samples_per_second": 36.68, | |
| "eval_steps_per_second": 6.13, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 106.03, | |
| "learning_rate": 4.4700457038391225e-05, | |
| "loss": 0.4711, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 106.03, | |
| "eval_loss": 4.336776256561279, | |
| "eval_runtime": 10.0173, | |
| "eval_samples_per_second": 36.437, | |
| "eval_steps_per_second": 6.089, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 107.86, | |
| "learning_rate": 4.460932358318099e-05, | |
| "loss": 0.4592, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 107.86, | |
| "eval_loss": 4.356552600860596, | |
| "eval_runtime": 9.981, | |
| "eval_samples_per_second": 36.569, | |
| "eval_steps_per_second": 6.112, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 109.69, | |
| "learning_rate": 4.451791590493602e-05, | |
| "loss": 0.4454, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 109.69, | |
| "eval_loss": 4.395864486694336, | |
| "eval_runtime": 10.013, | |
| "eval_samples_per_second": 36.453, | |
| "eval_steps_per_second": 6.092, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 111.52, | |
| "learning_rate": 4.442650822669104e-05, | |
| "loss": 0.4322, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 111.52, | |
| "eval_loss": 4.422004222869873, | |
| "eval_runtime": 9.976, | |
| "eval_samples_per_second": 36.588, | |
| "eval_steps_per_second": 6.115, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 113.35, | |
| "learning_rate": 4.4335100548446076e-05, | |
| "loss": 0.4188, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 113.35, | |
| "eval_loss": 4.454238414764404, | |
| "eval_runtime": 9.997, | |
| "eval_samples_per_second": 36.511, | |
| "eval_steps_per_second": 6.102, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 115.17, | |
| "learning_rate": 4.4243784277879344e-05, | |
| "loss": 0.4083, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 115.17, | |
| "eval_loss": 4.485558032989502, | |
| "eval_runtime": 9.98, | |
| "eval_samples_per_second": 36.573, | |
| "eval_steps_per_second": 6.112, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 117.0, | |
| "learning_rate": 4.415237659963437e-05, | |
| "loss": 0.3959, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 117.0, | |
| "eval_loss": 4.506533145904541, | |
| "eval_runtime": 9.988, | |
| "eval_samples_per_second": 36.544, | |
| "eval_steps_per_second": 6.107, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 118.83, | |
| "learning_rate": 4.40609689213894e-05, | |
| "loss": 0.3821, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 118.83, | |
| "eval_loss": 4.529088497161865, | |
| "eval_runtime": 9.974, | |
| "eval_samples_per_second": 36.595, | |
| "eval_steps_per_second": 6.116, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 120.66, | |
| "learning_rate": 4.3969561243144426e-05, | |
| "loss": 0.3718, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 120.66, | |
| "eval_loss": 4.579093933105469, | |
| "eval_runtime": 9.989, | |
| "eval_samples_per_second": 36.54, | |
| "eval_steps_per_second": 6.107, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 122.49, | |
| "learning_rate": 4.38782449725777e-05, | |
| "loss": 0.3617, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 122.49, | |
| "eval_loss": 4.601367473602295, | |
| "eval_runtime": 10.008, | |
| "eval_samples_per_second": 36.471, | |
| "eval_steps_per_second": 6.095, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 124.31, | |
| "learning_rate": 4.378692870201097e-05, | |
| "loss": 0.3505, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 124.31, | |
| "eval_loss": 4.62247896194458, | |
| "eval_runtime": 9.992, | |
| "eval_samples_per_second": 36.529, | |
| "eval_steps_per_second": 6.105, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 126.14, | |
| "learning_rate": 4.369561243144424e-05, | |
| "loss": 0.3426, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 126.14, | |
| "eval_loss": 4.659485816955566, | |
| "eval_runtime": 9.985, | |
| "eval_samples_per_second": 36.555, | |
| "eval_steps_per_second": 6.109, | |
| "step": 69000 | |
| } | |
| ], | |
| "max_steps": 547000, | |
| "num_train_epochs": 1000, | |
| "total_flos": 1.07877015062784e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |