{ "best_metric": 0.6713286713286714, "best_model_checkpoint": "wav2vec2-5Class-train-test-finetune-V1/checkpoint-858", "epoch": 323.0769230769231, "eval_steps": 500, "global_step": 1050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.92, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6025770902633667, "eval_runtime": 4.6997, "eval_samples_per_second": 60.854, "eval_steps_per_second": 0.638, "step": 3 }, { "epoch": 1.85, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6024432182312012, "eval_runtime": 4.9888, "eval_samples_per_second": 57.329, "eval_steps_per_second": 0.601, "step": 6 }, { "epoch": 2.77, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6022151708602905, "eval_runtime": 4.302, "eval_samples_per_second": 66.481, "eval_steps_per_second": 0.697, "step": 9 }, { "epoch": 4.0, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6017613410949707, "eval_runtime": 4.5772, "eval_samples_per_second": 62.484, "eval_steps_per_second": 0.655, "step": 13 }, { "epoch": 4.92, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6013050079345703, "eval_runtime": 4.3565, "eval_samples_per_second": 65.649, "eval_steps_per_second": 0.689, "step": 16 }, { "epoch": 5.85, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6007716655731201, "eval_runtime": 4.1903, "eval_samples_per_second": 68.253, "eval_steps_per_second": 0.716, "step": 19 }, { "epoch": 6.77, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.6001368761062622, "eval_runtime": 4.7263, "eval_samples_per_second": 60.513, "eval_steps_per_second": 0.635, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.16083916083916083, "eval_loss": 1.599120020866394, "eval_runtime": 4.511, "eval_samples_per_second": 63.4, "eval_steps_per_second": 0.665, "step": 26 }, { "epoch": 8.92, "eval_accuracy": 0.16433566433566432, "eval_loss": 1.5982468128204346, "eval_runtime": 4.2484, "eval_samples_per_second": 67.319, "eval_steps_per_second": 0.706, "step": 29 }, { "epoch": 9.85, "eval_accuracy": 0.1853146853146853, "eval_loss": 1.597304105758667, "eval_runtime": 3.8273, "eval_samples_per_second": 74.726, "eval_steps_per_second": 0.784, "step": 32 }, { "epoch": 10.77, "eval_accuracy": 0.1888111888111888, "eval_loss": 1.5962693691253662, "eval_runtime": 4.8122, "eval_samples_per_second": 59.432, "eval_steps_per_second": 0.623, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.21678321678321677, "eval_loss": 1.5947297811508179, "eval_runtime": 4.7135, "eval_samples_per_second": 60.677, "eval_steps_per_second": 0.636, "step": 39 }, { "epoch": 12.92, "eval_accuracy": 0.23076923076923078, "eval_loss": 1.59347403049469, "eval_runtime": 4.362, "eval_samples_per_second": 65.567, "eval_steps_per_second": 0.688, "step": 42 }, { "epoch": 13.85, "eval_accuracy": 0.23076923076923078, "eval_loss": 1.592130184173584, "eval_runtime": 3.6426, "eval_samples_per_second": 78.514, "eval_steps_per_second": 0.824, "step": 45 }, { "epoch": 14.77, "eval_accuracy": 0.24825174825174826, "eval_loss": 1.5907161235809326, "eval_runtime": 3.8908, "eval_samples_per_second": 73.506, "eval_steps_per_second": 0.771, "step": 48 }, { "epoch": 15.38, "grad_norm": 0.9358195066452026, "learning_rate": 1.4285714285714285e-05, "loss": 1.5896, "step": 50 }, { "epoch": 16.0, "eval_accuracy": 0.27972027972027974, "eval_loss": 1.588749647140503, "eval_runtime": 4.5869, "eval_samples_per_second": 62.352, "eval_steps_per_second": 0.654, "step": 52 }, { "epoch": 16.92, "eval_accuracy": 0.2937062937062937, "eval_loss": 1.5871775150299072, "eval_runtime": 3.7115, "eval_samples_per_second": 77.057, "eval_steps_per_second": 0.808, "step": 55 }, { "epoch": 17.85, "eval_accuracy": 0.3041958041958042, "eval_loss": 1.5855580568313599, "eval_runtime": 4.1287, "eval_samples_per_second": 69.271, "eval_steps_per_second": 0.727, "step": 58 }, { "epoch": 18.77, "eval_accuracy": 0.3356643356643357, "eval_loss": 1.5838947296142578, "eval_runtime": 4.2428, "eval_samples_per_second": 67.408, "eval_steps_per_second": 0.707, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.3706293706293706, "eval_loss": 1.5815041065216064, "eval_runtime": 4.1047, "eval_samples_per_second": 69.677, "eval_steps_per_second": 0.731, "step": 65 }, { "epoch": 20.92, "eval_accuracy": 0.3811188811188811, "eval_loss": 1.5795336961746216, "eval_runtime": 4.3419, "eval_samples_per_second": 65.869, "eval_steps_per_second": 0.691, "step": 68 }, { "epoch": 21.85, "eval_accuracy": 0.3776223776223776, "eval_loss": 1.5774352550506592, "eval_runtime": 5.0868, "eval_samples_per_second": 56.224, "eval_steps_per_second": 0.59, "step": 71 }, { "epoch": 22.77, "eval_accuracy": 0.36013986013986016, "eval_loss": 1.5753329992294312, "eval_runtime": 4.2391, "eval_samples_per_second": 67.468, "eval_steps_per_second": 0.708, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.3531468531468531, "eval_loss": 1.5723278522491455, "eval_runtime": 4.1913, "eval_samples_per_second": 68.237, "eval_steps_per_second": 0.716, "step": 78 }, { "epoch": 24.92, "eval_accuracy": 0.33916083916083917, "eval_loss": 1.5699166059494019, "eval_runtime": 4.1956, "eval_samples_per_second": 68.166, "eval_steps_per_second": 0.715, "step": 81 }, { "epoch": 25.85, "eval_accuracy": 0.32867132867132864, "eval_loss": 1.5674887895584106, "eval_runtime": 4.2222, "eval_samples_per_second": 67.737, "eval_steps_per_second": 0.711, "step": 84 }, { "epoch": 26.77, "eval_accuracy": 0.32167832167832167, "eval_loss": 1.564941167831421, "eval_runtime": 4.3597, "eval_samples_per_second": 65.6, "eval_steps_per_second": 0.688, "step": 87 }, { "epoch": 28.0, "eval_accuracy": 0.3146853146853147, "eval_loss": 1.5612314939498901, "eval_runtime": 4.4489, "eval_samples_per_second": 64.285, "eval_steps_per_second": 0.674, "step": 91 }, { "epoch": 28.92, "eval_accuracy": 0.3111888111888112, "eval_loss": 1.5583022832870483, "eval_runtime": 3.9002, "eval_samples_per_second": 73.33, "eval_steps_per_second": 0.769, "step": 94 }, { "epoch": 29.85, "eval_accuracy": 0.3076923076923077, "eval_loss": 1.5552746057510376, "eval_runtime": 3.8211, "eval_samples_per_second": 74.848, "eval_steps_per_second": 0.785, "step": 97 }, { "epoch": 30.77, "grad_norm": 1.0354599952697754, "learning_rate": 2.857142857142857e-05, "loss": 1.5478, "step": 100 }, { "epoch": 30.77, "eval_accuracy": 0.3111888111888112, "eval_loss": 1.5521942377090454, "eval_runtime": 3.9174, "eval_samples_per_second": 73.008, "eval_steps_per_second": 0.766, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.3006993006993007, "eval_loss": 1.5478450059890747, "eval_runtime": 4.2795, "eval_samples_per_second": 66.83, "eval_steps_per_second": 0.701, "step": 104 }, { "epoch": 32.92, "eval_accuracy": 0.2937062937062937, "eval_loss": 1.5445122718811035, "eval_runtime": 4.2852, "eval_samples_per_second": 66.742, "eval_steps_per_second": 0.7, "step": 107 }, { "epoch": 33.85, "eval_accuracy": 0.2867132867132867, "eval_loss": 1.5413419008255005, "eval_runtime": 4.7279, "eval_samples_per_second": 60.492, "eval_steps_per_second": 0.635, "step": 110 }, { "epoch": 34.77, "eval_accuracy": 0.2762237762237762, "eval_loss": 1.5382936000823975, "eval_runtime": 4.5512, "eval_samples_per_second": 62.841, "eval_steps_per_second": 0.659, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.2762237762237762, "eval_loss": 1.5340471267700195, "eval_runtime": 4.0909, "eval_samples_per_second": 69.911, "eval_steps_per_second": 0.733, "step": 117 }, { "epoch": 36.92, "eval_accuracy": 0.26573426573426573, "eval_loss": 1.5310546159744263, "eval_runtime": 3.6024, "eval_samples_per_second": 79.392, "eval_steps_per_second": 0.833, "step": 120 }, { "epoch": 37.85, "eval_accuracy": 0.2517482517482518, "eval_loss": 1.5282360315322876, "eval_runtime": 3.8247, "eval_samples_per_second": 74.777, "eval_steps_per_second": 0.784, "step": 123 }, { "epoch": 38.77, "eval_accuracy": 0.24475524475524477, "eval_loss": 1.5255342721939087, "eval_runtime": 4.5969, "eval_samples_per_second": 62.216, "eval_steps_per_second": 0.653, "step": 126 }, { "epoch": 40.0, "eval_accuracy": 0.24125874125874125, "eval_loss": 1.5223983526229858, "eval_runtime": 4.5515, "eval_samples_per_second": 62.836, "eval_steps_per_second": 0.659, "step": 130 }, { "epoch": 40.92, "eval_accuracy": 0.23426573426573427, "eval_loss": 1.5204370021820068, "eval_runtime": 3.5155, "eval_samples_per_second": 81.354, "eval_steps_per_second": 0.853, "step": 133 }, { "epoch": 41.85, "eval_accuracy": 0.24475524475524477, "eval_loss": 1.5190935134887695, "eval_runtime": 4.1808, "eval_samples_per_second": 68.409, "eval_steps_per_second": 0.718, "step": 136 }, { "epoch": 42.77, "eval_accuracy": 0.23776223776223776, "eval_loss": 1.5183987617492676, "eval_runtime": 5.6726, "eval_samples_per_second": 50.418, "eval_steps_per_second": 0.529, "step": 139 }, { "epoch": 44.0, "eval_accuracy": 0.23076923076923078, "eval_loss": 1.518118143081665, "eval_runtime": 4.9675, "eval_samples_per_second": 57.575, "eval_steps_per_second": 0.604, "step": 143 }, { "epoch": 44.92, "eval_accuracy": 0.23076923076923078, "eval_loss": 1.5188548564910889, "eval_runtime": 3.8693, "eval_samples_per_second": 73.915, "eval_steps_per_second": 0.775, "step": 146 }, { "epoch": 45.85, "eval_accuracy": 0.23776223776223776, "eval_loss": 1.5198808908462524, "eval_runtime": 4.5179, "eval_samples_per_second": 63.304, "eval_steps_per_second": 0.664, "step": 149 }, { "epoch": 46.15, "grad_norm": 0.8092480897903442, "learning_rate": 2.857142857142857e-05, "loss": 1.4365, "step": 150 }, { "epoch": 46.77, "eval_accuracy": 0.24825174825174826, "eval_loss": 1.5214567184448242, "eval_runtime": 4.1214, "eval_samples_per_second": 69.394, "eval_steps_per_second": 0.728, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.25874125874125875, "eval_loss": 1.5235881805419922, "eval_runtime": 4.2822, "eval_samples_per_second": 66.789, "eval_steps_per_second": 0.701, "step": 156 }, { "epoch": 48.92, "eval_accuracy": 0.26573426573426573, "eval_loss": 1.5250674486160278, "eval_runtime": 6.0487, "eval_samples_per_second": 47.283, "eval_steps_per_second": 0.496, "step": 159 }, { "epoch": 49.85, "eval_accuracy": 0.28321678321678323, "eval_loss": 1.5259337425231934, "eval_runtime": 4.1709, "eval_samples_per_second": 68.571, "eval_steps_per_second": 0.719, "step": 162 }, { "epoch": 50.77, "eval_accuracy": 0.27972027972027974, "eval_loss": 1.5261683464050293, "eval_runtime": 4.3522, "eval_samples_per_second": 65.713, "eval_steps_per_second": 0.689, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.2937062937062937, "eval_loss": 1.5248568058013916, "eval_runtime": 4.8279, "eval_samples_per_second": 59.24, "eval_steps_per_second": 0.621, "step": 169 }, { "epoch": 52.92, "eval_accuracy": 0.3006993006993007, "eval_loss": 1.522719383239746, "eval_runtime": 4.205, "eval_samples_per_second": 68.014, "eval_steps_per_second": 0.713, "step": 172 }, { "epoch": 53.85, "eval_accuracy": 0.3076923076923077, "eval_loss": 1.5189520120620728, "eval_runtime": 5.4852, "eval_samples_per_second": 52.14, "eval_steps_per_second": 0.547, "step": 175 }, { "epoch": 54.77, "eval_accuracy": 0.32167832167832167, "eval_loss": 1.5137863159179688, "eval_runtime": 4.3909, "eval_samples_per_second": 65.134, "eval_steps_per_second": 0.683, "step": 178 }, { "epoch": 56.0, "eval_accuracy": 0.34965034965034963, "eval_loss": 1.5053085088729858, "eval_runtime": 4.0994, "eval_samples_per_second": 69.766, "eval_steps_per_second": 0.732, "step": 182 }, { "epoch": 56.92, "eval_accuracy": 0.36013986013986016, "eval_loss": 1.497667908668518, "eval_runtime": 4.4275, "eval_samples_per_second": 64.596, "eval_steps_per_second": 0.678, "step": 185 }, { "epoch": 57.85, "eval_accuracy": 0.36013986013986016, "eval_loss": 1.4910008907318115, "eval_runtime": 4.4713, "eval_samples_per_second": 63.963, "eval_steps_per_second": 0.671, "step": 188 }, { "epoch": 58.77, "eval_accuracy": 0.36713286713286714, "eval_loss": 1.4839717149734497, "eval_runtime": 4.6741, "eval_samples_per_second": 61.189, "eval_steps_per_second": 0.642, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.3706293706293706, "eval_loss": 1.4754726886749268, "eval_runtime": 4.1736, "eval_samples_per_second": 68.526, "eval_steps_per_second": 0.719, "step": 195 }, { "epoch": 60.92, "eval_accuracy": 0.3811188811188811, "eval_loss": 1.4683842658996582, "eval_runtime": 3.9445, "eval_samples_per_second": 72.507, "eval_steps_per_second": 0.761, "step": 198 }, { "epoch": 61.54, "grad_norm": 0.41712912917137146, "learning_rate": 2.6984126984126984e-05, "loss": 1.2845, "step": 200 }, { "epoch": 61.85, "eval_accuracy": 0.38461538461538464, "eval_loss": 1.4626859426498413, "eval_runtime": 4.7292, "eval_samples_per_second": 60.475, "eval_steps_per_second": 0.634, "step": 201 }, { "epoch": 62.77, "eval_accuracy": 0.3881118881118881, "eval_loss": 1.4546846151351929, "eval_runtime": 4.5742, "eval_samples_per_second": 62.524, "eval_steps_per_second": 0.656, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.4020979020979021, "eval_loss": 1.4456309080123901, "eval_runtime": 4.1295, "eval_samples_per_second": 69.259, "eval_steps_per_second": 0.726, "step": 208 }, { "epoch": 64.92, "eval_accuracy": 0.40559440559440557, "eval_loss": 1.4385287761688232, "eval_runtime": 4.9759, "eval_samples_per_second": 57.477, "eval_steps_per_second": 0.603, "step": 211 }, { "epoch": 65.85, "eval_accuracy": 0.4090909090909091, "eval_loss": 1.4315868616104126, "eval_runtime": 4.7, "eval_samples_per_second": 60.851, "eval_steps_per_second": 0.638, "step": 214 }, { "epoch": 66.77, "eval_accuracy": 0.4160839160839161, "eval_loss": 1.423151969909668, "eval_runtime": 4.1887, "eval_samples_per_second": 68.28, "eval_steps_per_second": 0.716, "step": 217 }, { "epoch": 68.0, "eval_accuracy": 0.42657342657342656, "eval_loss": 1.4132899045944214, "eval_runtime": 6.2189, "eval_samples_per_second": 45.989, "eval_steps_per_second": 0.482, "step": 221 }, { "epoch": 68.92, "eval_accuracy": 0.43006993006993005, "eval_loss": 1.4061568975448608, "eval_runtime": 5.3452, "eval_samples_per_second": 53.506, "eval_steps_per_second": 0.561, "step": 224 }, { "epoch": 69.85, "eval_accuracy": 0.43356643356643354, "eval_loss": 1.4003138542175293, "eval_runtime": 5.111, "eval_samples_per_second": 55.958, "eval_steps_per_second": 0.587, "step": 227 }, { "epoch": 70.77, "eval_accuracy": 0.43356643356643354, "eval_loss": 1.396316647529602, "eval_runtime": 4.6366, "eval_samples_per_second": 61.683, "eval_steps_per_second": 0.647, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.43356643356643354, "eval_loss": 1.3880172967910767, "eval_runtime": 4.6851, "eval_samples_per_second": 61.045, "eval_steps_per_second": 0.64, "step": 234 }, { "epoch": 72.92, "eval_accuracy": 0.4370629370629371, "eval_loss": 1.3800761699676514, "eval_runtime": 4.5739, "eval_samples_per_second": 62.528, "eval_steps_per_second": 0.656, "step": 237 }, { "epoch": 73.85, "eval_accuracy": 0.4405594405594406, "eval_loss": 1.3724888563156128, "eval_runtime": 4.9175, "eval_samples_per_second": 58.16, "eval_steps_per_second": 0.61, "step": 240 }, { "epoch": 74.77, "eval_accuracy": 0.44755244755244755, "eval_loss": 1.3655133247375488, "eval_runtime": 5.0566, "eval_samples_per_second": 56.559, "eval_steps_per_second": 0.593, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.45104895104895104, "eval_loss": 1.3560560941696167, "eval_runtime": 3.592, "eval_samples_per_second": 79.622, "eval_steps_per_second": 0.835, "step": 247 }, { "epoch": 76.92, "grad_norm": 0.3923046588897705, "learning_rate": 2.5396825396825397e-05, "loss": 1.1752, "step": 250 }, { "epoch": 76.92, "eval_accuracy": 0.45454545454545453, "eval_loss": 1.3477288484573364, "eval_runtime": 5.2706, "eval_samples_per_second": 54.264, "eval_steps_per_second": 0.569, "step": 250 }, { "epoch": 77.85, "eval_accuracy": 0.45454545454545453, "eval_loss": 1.3385549783706665, "eval_runtime": 4.3668, "eval_samples_per_second": 65.494, "eval_steps_per_second": 0.687, "step": 253 }, { "epoch": 78.77, "eval_accuracy": 0.45454545454545453, "eval_loss": 1.329953908920288, "eval_runtime": 4.3715, "eval_samples_per_second": 65.423, "eval_steps_per_second": 0.686, "step": 256 }, { "epoch": 80.0, "eval_accuracy": 0.46153846153846156, "eval_loss": 1.3186790943145752, "eval_runtime": 4.9164, "eval_samples_per_second": 58.173, "eval_steps_per_second": 0.61, "step": 260 }, { "epoch": 80.92, "eval_accuracy": 0.47202797202797203, "eval_loss": 1.3101806640625, "eval_runtime": 4.5555, "eval_samples_per_second": 62.782, "eval_steps_per_second": 0.659, "step": 263 }, { "epoch": 81.85, "eval_accuracy": 0.4755244755244755, "eval_loss": 1.3015599250793457, "eval_runtime": 3.7938, "eval_samples_per_second": 75.387, "eval_steps_per_second": 0.791, "step": 266 }, { "epoch": 82.77, "eval_accuracy": 0.4825174825174825, "eval_loss": 1.2916128635406494, "eval_runtime": 3.4076, "eval_samples_per_second": 83.931, "eval_steps_per_second": 0.88, "step": 269 }, { "epoch": 84.0, "eval_accuracy": 0.4825174825174825, "eval_loss": 1.2802271842956543, "eval_runtime": 3.947, "eval_samples_per_second": 72.46, "eval_steps_per_second": 0.76, "step": 273 }, { "epoch": 84.92, "eval_accuracy": 0.486013986013986, "eval_loss": 1.2717865705490112, "eval_runtime": 4.6584, "eval_samples_per_second": 61.394, "eval_steps_per_second": 0.644, "step": 276 }, { "epoch": 85.85, "eval_accuracy": 0.48951048951048953, "eval_loss": 1.2625547647476196, "eval_runtime": 3.7402, "eval_samples_per_second": 76.466, "eval_steps_per_second": 0.802, "step": 279 }, { "epoch": 86.77, "eval_accuracy": 0.493006993006993, "eval_loss": 1.2543796300888062, "eval_runtime": 4.726, "eval_samples_per_second": 60.516, "eval_steps_per_second": 0.635, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.493006993006993, "eval_loss": 1.2428691387176514, "eval_runtime": 3.9492, "eval_samples_per_second": 72.42, "eval_steps_per_second": 0.76, "step": 286 }, { "epoch": 88.92, "eval_accuracy": 0.4965034965034965, "eval_loss": 1.2338002920150757, "eval_runtime": 4.6146, "eval_samples_per_second": 61.978, "eval_steps_per_second": 0.65, "step": 289 }, { "epoch": 89.85, "eval_accuracy": 0.5034965034965035, "eval_loss": 1.223657488822937, "eval_runtime": 4.7664, "eval_samples_per_second": 60.003, "eval_steps_per_second": 0.629, "step": 292 }, { "epoch": 90.77, "eval_accuracy": 0.513986013986014, "eval_loss": 1.2134180068969727, "eval_runtime": 3.6509, "eval_samples_per_second": 78.337, "eval_steps_per_second": 0.822, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.5314685314685315, "eval_loss": 1.1996746063232422, "eval_runtime": 4.1379, "eval_samples_per_second": 69.118, "eval_steps_per_second": 0.725, "step": 299 }, { "epoch": 92.31, "grad_norm": 0.4333157241344452, "learning_rate": 2.380952380952381e-05, "loss": 1.0336, "step": 300 }, { "epoch": 92.92, "eval_accuracy": 0.534965034965035, "eval_loss": 1.1893947124481201, "eval_runtime": 4.3495, "eval_samples_per_second": 65.755, "eval_steps_per_second": 0.69, "step": 302 }, { "epoch": 93.85, "eval_accuracy": 0.5524475524475524, "eval_loss": 1.1794614791870117, "eval_runtime": 5.0315, "eval_samples_per_second": 56.842, "eval_steps_per_second": 0.596, "step": 305 }, { "epoch": 94.77, "eval_accuracy": 0.5629370629370629, "eval_loss": 1.1703894138336182, "eval_runtime": 4.5156, "eval_samples_per_second": 63.336, "eval_steps_per_second": 0.664, "step": 308 }, { "epoch": 96.0, "eval_accuracy": 0.5629370629370629, "eval_loss": 1.1574207544326782, "eval_runtime": 4.2769, "eval_samples_per_second": 66.871, "eval_steps_per_second": 0.701, "step": 312 }, { "epoch": 96.92, "eval_accuracy": 0.5804195804195804, "eval_loss": 1.1478246450424194, "eval_runtime": 4.4503, "eval_samples_per_second": 64.266, "eval_steps_per_second": 0.674, "step": 315 }, { "epoch": 97.85, "eval_accuracy": 0.583916083916084, "eval_loss": 1.1388368606567383, "eval_runtime": 5.2516, "eval_samples_per_second": 54.46, "eval_steps_per_second": 0.571, "step": 318 }, { "epoch": 98.77, "eval_accuracy": 0.5874125874125874, "eval_loss": 1.129955530166626, "eval_runtime": 4.5307, "eval_samples_per_second": 63.125, "eval_steps_per_second": 0.662, "step": 321 }, { "epoch": 100.0, "eval_accuracy": 0.5944055944055944, "eval_loss": 1.1171796321868896, "eval_runtime": 4.5358, "eval_samples_per_second": 63.054, "eval_steps_per_second": 0.661, "step": 325 }, { "epoch": 100.92, "eval_accuracy": 0.5979020979020979, "eval_loss": 1.1089597940444946, "eval_runtime": 4.7931, "eval_samples_per_second": 59.669, "eval_steps_per_second": 0.626, "step": 328 }, { "epoch": 101.85, "eval_accuracy": 0.5944055944055944, "eval_loss": 1.1001044511795044, "eval_runtime": 4.4134, "eval_samples_per_second": 64.802, "eval_steps_per_second": 0.68, "step": 331 }, { "epoch": 102.77, "eval_accuracy": 0.6048951048951049, "eval_loss": 1.0910252332687378, "eval_runtime": 4.8251, "eval_samples_per_second": 59.273, "eval_steps_per_second": 0.622, "step": 334 }, { "epoch": 104.0, "eval_accuracy": 0.6013986013986014, "eval_loss": 1.0768800973892212, "eval_runtime": 5.528, "eval_samples_per_second": 51.736, "eval_steps_per_second": 0.543, "step": 338 }, { "epoch": 104.92, "eval_accuracy": 0.6048951048951049, "eval_loss": 1.0675218105316162, "eval_runtime": 3.5589, "eval_samples_per_second": 80.362, "eval_steps_per_second": 0.843, "step": 341 }, { "epoch": 105.85, "eval_accuracy": 0.6118881118881119, "eval_loss": 1.0602259635925293, "eval_runtime": 4.2603, "eval_samples_per_second": 67.132, "eval_steps_per_second": 0.704, "step": 344 }, { "epoch": 106.77, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.0537227392196655, "eval_runtime": 4.4326, "eval_samples_per_second": 64.521, "eval_steps_per_second": 0.677, "step": 347 }, { "epoch": 107.69, "grad_norm": 0.46974265575408936, "learning_rate": 2.222222222222222e-05, "loss": 0.8927, "step": 350 }, { "epoch": 108.0, "eval_accuracy": 0.6223776223776224, "eval_loss": 1.045584797859192, "eval_runtime": 3.8481, "eval_samples_per_second": 74.323, "eval_steps_per_second": 0.78, "step": 351 }, { "epoch": 108.92, "eval_accuracy": 0.6293706293706294, "eval_loss": 1.0394147634506226, "eval_runtime": 4.6438, "eval_samples_per_second": 61.588, "eval_steps_per_second": 0.646, "step": 354 }, { "epoch": 109.85, "eval_accuracy": 0.6258741258741258, "eval_loss": 1.0330853462219238, "eval_runtime": 4.1627, "eval_samples_per_second": 68.706, "eval_steps_per_second": 0.721, "step": 357 }, { "epoch": 110.77, "eval_accuracy": 0.6258741258741258, "eval_loss": 1.0267359018325806, "eval_runtime": 4.5108, "eval_samples_per_second": 63.403, "eval_steps_per_second": 0.665, "step": 360 }, { "epoch": 112.0, "eval_accuracy": 0.6328671328671329, "eval_loss": 1.0192536115646362, "eval_runtime": 4.9039, "eval_samples_per_second": 58.321, "eval_steps_per_second": 0.612, "step": 364 }, { "epoch": 112.92, "eval_accuracy": 0.6363636363636364, "eval_loss": 1.0148723125457764, "eval_runtime": 3.8444, "eval_samples_per_second": 74.393, "eval_steps_per_second": 0.78, "step": 367 }, { "epoch": 113.85, "eval_accuracy": 0.6363636363636364, "eval_loss": 1.0099798440933228, "eval_runtime": 5.0977, "eval_samples_per_second": 56.104, "eval_steps_per_second": 0.589, "step": 370 }, { "epoch": 114.77, "eval_accuracy": 0.6398601398601399, "eval_loss": 1.0047037601470947, "eval_runtime": 4.837, "eval_samples_per_second": 59.128, "eval_steps_per_second": 0.62, "step": 373 }, { "epoch": 116.0, "eval_accuracy": 0.6398601398601399, "eval_loss": 0.9991178512573242, "eval_runtime": 4.9051, "eval_samples_per_second": 58.307, "eval_steps_per_second": 0.612, "step": 377 }, { "epoch": 116.92, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9972890615463257, "eval_runtime": 4.3405, "eval_samples_per_second": 65.891, "eval_steps_per_second": 0.691, "step": 380 }, { "epoch": 117.85, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9936119914054871, "eval_runtime": 3.9596, "eval_samples_per_second": 72.229, "eval_steps_per_second": 0.758, "step": 383 }, { "epoch": 118.77, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9908724427223206, "eval_runtime": 4.961, "eval_samples_per_second": 57.65, "eval_steps_per_second": 0.605, "step": 386 }, { "epoch": 120.0, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9878212213516235, "eval_runtime": 4.8469, "eval_samples_per_second": 59.006, "eval_steps_per_second": 0.619, "step": 390 }, { "epoch": 120.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9840787053108215, "eval_runtime": 4.5014, "eval_samples_per_second": 63.536, "eval_steps_per_second": 0.666, "step": 393 }, { "epoch": 121.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9810080528259277, "eval_runtime": 3.7722, "eval_samples_per_second": 75.818, "eval_steps_per_second": 0.795, "step": 396 }, { "epoch": 122.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9769043922424316, "eval_runtime": 4.713, "eval_samples_per_second": 60.684, "eval_steps_per_second": 0.637, "step": 399 }, { "epoch": 123.08, "grad_norm": 0.48339083790779114, "learning_rate": 2.0634920634920633e-05, "loss": 0.7859, "step": 400 }, { "epoch": 124.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9750482439994812, "eval_runtime": 4.9657, "eval_samples_per_second": 57.595, "eval_steps_per_second": 0.604, "step": 403 }, { "epoch": 124.92, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9752389192581177, "eval_runtime": 4.6155, "eval_samples_per_second": 61.965, "eval_steps_per_second": 0.65, "step": 406 }, { "epoch": 125.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.975228488445282, "eval_runtime": 4.2181, "eval_samples_per_second": 67.803, "eval_steps_per_second": 0.711, "step": 409 }, { "epoch": 126.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.973861575126648, "eval_runtime": 5.1633, "eval_samples_per_second": 55.391, "eval_steps_per_second": 0.581, "step": 412 }, { "epoch": 128.0, "eval_accuracy": 0.6433566433566433, "eval_loss": 0.9697291254997253, "eval_runtime": 3.8938, "eval_samples_per_second": 73.451, "eval_steps_per_second": 0.77, "step": 416 }, { "epoch": 128.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9673418998718262, "eval_runtime": 4.9929, "eval_samples_per_second": 57.282, "eval_steps_per_second": 0.601, "step": 419 }, { "epoch": 129.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9654524922370911, "eval_runtime": 4.8097, "eval_samples_per_second": 59.463, "eval_steps_per_second": 0.624, "step": 422 }, { "epoch": 130.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9648922085762024, "eval_runtime": 4.6728, "eval_samples_per_second": 61.205, "eval_steps_per_second": 0.642, "step": 425 }, { "epoch": 132.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9624325633049011, "eval_runtime": 3.9139, "eval_samples_per_second": 73.073, "eval_steps_per_second": 0.766, "step": 429 }, { "epoch": 132.92, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.960906445980072, "eval_runtime": 5.0181, "eval_samples_per_second": 56.994, "eval_steps_per_second": 0.598, "step": 432 }, { "epoch": 133.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.958863377571106, "eval_runtime": 4.172, "eval_samples_per_second": 68.552, "eval_steps_per_second": 0.719, "step": 435 }, { "epoch": 134.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9576795697212219, "eval_runtime": 5.1205, "eval_samples_per_second": 55.854, "eval_steps_per_second": 0.586, "step": 438 }, { "epoch": 136.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9575643539428711, "eval_runtime": 3.9439, "eval_samples_per_second": 72.517, "eval_steps_per_second": 0.761, "step": 442 }, { "epoch": 136.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9587862491607666, "eval_runtime": 4.289, "eval_samples_per_second": 66.682, "eval_steps_per_second": 0.699, "step": 445 }, { "epoch": 137.85, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9579612016677856, "eval_runtime": 4.3725, "eval_samples_per_second": 65.409, "eval_steps_per_second": 0.686, "step": 448 }, { "epoch": 138.46, "grad_norm": 0.5380613803863525, "learning_rate": 1.9047619047619046e-05, "loss": 0.7428, "step": 450 }, { "epoch": 138.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9572532176971436, "eval_runtime": 4.4425, "eval_samples_per_second": 64.378, "eval_steps_per_second": 0.675, "step": 451 }, { "epoch": 140.0, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9572116732597351, "eval_runtime": 5.4248, "eval_samples_per_second": 52.721, "eval_steps_per_second": 0.553, "step": 455 }, { "epoch": 140.92, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9591361284255981, "eval_runtime": 4.4268, "eval_samples_per_second": 64.606, "eval_steps_per_second": 0.678, "step": 458 }, { "epoch": 141.85, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9620410203933716, "eval_runtime": 5.1983, "eval_samples_per_second": 55.018, "eval_steps_per_second": 0.577, "step": 461 }, { "epoch": 142.77, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9638076424598694, "eval_runtime": 4.2721, "eval_samples_per_second": 66.945, "eval_steps_per_second": 0.702, "step": 464 }, { "epoch": 144.0, "eval_accuracy": 0.6503496503496503, "eval_loss": 0.9598986506462097, "eval_runtime": 4.7252, "eval_samples_per_second": 60.527, "eval_steps_per_second": 0.635, "step": 468 }, { "epoch": 144.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9564277529716492, "eval_runtime": 4.3597, "eval_samples_per_second": 65.601, "eval_steps_per_second": 0.688, "step": 471 }, { "epoch": 145.85, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9549375176429749, "eval_runtime": 4.7228, "eval_samples_per_second": 60.558, "eval_steps_per_second": 0.635, "step": 474 }, { "epoch": 146.77, "eval_accuracy": 0.6468531468531469, "eval_loss": 0.9566131830215454, "eval_runtime": 4.5206, "eval_samples_per_second": 63.267, "eval_steps_per_second": 0.664, "step": 477 }, { "epoch": 148.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9600043892860413, "eval_runtime": 4.5711, "eval_samples_per_second": 62.567, "eval_steps_per_second": 0.656, "step": 481 }, { "epoch": 148.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9608636498451233, "eval_runtime": 4.4191, "eval_samples_per_second": 64.719, "eval_steps_per_second": 0.679, "step": 484 }, { "epoch": 149.85, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.958177387714386, "eval_runtime": 5.4181, "eval_samples_per_second": 52.786, "eval_steps_per_second": 0.554, "step": 487 }, { "epoch": 150.77, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9540855288505554, "eval_runtime": 4.3306, "eval_samples_per_second": 66.041, "eval_steps_per_second": 0.693, "step": 490 }, { "epoch": 152.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.955094575881958, "eval_runtime": 4.3124, "eval_samples_per_second": 66.32, "eval_steps_per_second": 0.696, "step": 494 }, { "epoch": 152.92, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9549675583839417, "eval_runtime": 5.2148, "eval_samples_per_second": 54.844, "eval_steps_per_second": 0.575, "step": 497 }, { "epoch": 153.85, "grad_norm": 0.44109636545181274, "learning_rate": 1.746031746031746e-05, "loss": 0.7119, "step": 500 }, { "epoch": 153.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9533360600471497, "eval_runtime": 4.9161, "eval_samples_per_second": 58.177, "eval_steps_per_second": 0.61, "step": 500 }, { "epoch": 154.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9527105689048767, "eval_runtime": 4.9344, "eval_samples_per_second": 57.96, "eval_steps_per_second": 0.608, "step": 503 }, { "epoch": 156.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9555291533470154, "eval_runtime": 4.0853, "eval_samples_per_second": 70.008, "eval_steps_per_second": 0.734, "step": 507 }, { "epoch": 156.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9557806849479675, "eval_runtime": 4.5929, "eval_samples_per_second": 62.27, "eval_steps_per_second": 0.653, "step": 510 }, { "epoch": 157.85, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9578056931495667, "eval_runtime": 4.7785, "eval_samples_per_second": 59.851, "eval_steps_per_second": 0.628, "step": 513 }, { "epoch": 158.77, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9589922428131104, "eval_runtime": 3.9453, "eval_samples_per_second": 72.491, "eval_steps_per_second": 0.76, "step": 516 }, { "epoch": 160.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9553244113922119, "eval_runtime": 4.0583, "eval_samples_per_second": 70.473, "eval_steps_per_second": 0.739, "step": 520 }, { "epoch": 160.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9509897232055664, "eval_runtime": 4.3977, "eval_samples_per_second": 65.034, "eval_steps_per_second": 0.682, "step": 523 }, { "epoch": 161.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9446966052055359, "eval_runtime": 4.6204, "eval_samples_per_second": 61.899, "eval_steps_per_second": 0.649, "step": 526 }, { "epoch": 162.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9404750466346741, "eval_runtime": 4.132, "eval_samples_per_second": 69.216, "eval_steps_per_second": 0.726, "step": 529 }, { "epoch": 164.0, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9428921341896057, "eval_runtime": 4.3506, "eval_samples_per_second": 65.738, "eval_steps_per_second": 0.69, "step": 533 }, { "epoch": 164.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9472917914390564, "eval_runtime": 4.0197, "eval_samples_per_second": 71.15, "eval_steps_per_second": 0.746, "step": 536 }, { "epoch": 165.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.952194333076477, "eval_runtime": 4.7031, "eval_samples_per_second": 60.811, "eval_steps_per_second": 0.638, "step": 539 }, { "epoch": 166.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9533052444458008, "eval_runtime": 4.3489, "eval_samples_per_second": 65.764, "eval_steps_per_second": 0.69, "step": 542 }, { "epoch": 168.0, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9497887492179871, "eval_runtime": 4.2573, "eval_samples_per_second": 67.178, "eval_steps_per_second": 0.705, "step": 546 }, { "epoch": 168.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9472330808639526, "eval_runtime": 4.4709, "eval_samples_per_second": 63.969, "eval_steps_per_second": 0.671, "step": 549 }, { "epoch": 169.23, "grad_norm": 0.8247159123420715, "learning_rate": 1.5873015873015872e-05, "loss": 0.6802, "step": 550 }, { "epoch": 169.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9484440684318542, "eval_runtime": 4.8437, "eval_samples_per_second": 59.046, "eval_steps_per_second": 0.619, "step": 552 }, { "epoch": 170.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.948759913444519, "eval_runtime": 4.0675, "eval_samples_per_second": 70.313, "eval_steps_per_second": 0.738, "step": 555 }, { "epoch": 172.0, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9508001208305359, "eval_runtime": 4.1371, "eval_samples_per_second": 69.13, "eval_steps_per_second": 0.725, "step": 559 }, { "epoch": 172.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9550370573997498, "eval_runtime": 5.1562, "eval_samples_per_second": 55.467, "eval_steps_per_second": 0.582, "step": 562 }, { "epoch": 173.85, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9578031301498413, "eval_runtime": 5.1826, "eval_samples_per_second": 55.185, "eval_steps_per_second": 0.579, "step": 565 }, { "epoch": 174.77, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9607414603233337, "eval_runtime": 3.9547, "eval_samples_per_second": 72.318, "eval_steps_per_second": 0.759, "step": 568 }, { "epoch": 176.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9589574933052063, "eval_runtime": 4.6913, "eval_samples_per_second": 60.963, "eval_steps_per_second": 0.639, "step": 572 }, { "epoch": 176.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9530745148658752, "eval_runtime": 4.4058, "eval_samples_per_second": 64.915, "eval_steps_per_second": 0.681, "step": 575 }, { "epoch": 177.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9497523903846741, "eval_runtime": 4.7135, "eval_samples_per_second": 60.677, "eval_steps_per_second": 0.636, "step": 578 }, { "epoch": 178.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9497215151786804, "eval_runtime": 4.021, "eval_samples_per_second": 71.127, "eval_steps_per_second": 0.746, "step": 581 }, { "epoch": 180.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9547012448310852, "eval_runtime": 4.666, "eval_samples_per_second": 61.295, "eval_steps_per_second": 0.643, "step": 585 }, { "epoch": 180.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9555142521858215, "eval_runtime": 4.1238, "eval_samples_per_second": 69.353, "eval_steps_per_second": 0.727, "step": 588 }, { "epoch": 181.85, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9561424255371094, "eval_runtime": 4.7157, "eval_samples_per_second": 60.648, "eval_steps_per_second": 0.636, "step": 591 }, { "epoch": 182.77, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9556129574775696, "eval_runtime": 4.9472, "eval_samples_per_second": 57.81, "eval_steps_per_second": 0.606, "step": 594 }, { "epoch": 184.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9521645307540894, "eval_runtime": 4.7357, "eval_samples_per_second": 60.393, "eval_steps_per_second": 0.633, "step": 598 }, { "epoch": 184.62, "grad_norm": 0.5078141689300537, "learning_rate": 1.4285714285714285e-05, "loss": 0.6609, "step": 600 }, { "epoch": 184.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9505414366722107, "eval_runtime": 4.6852, "eval_samples_per_second": 61.043, "eval_steps_per_second": 0.64, "step": 601 }, { "epoch": 185.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9508634805679321, "eval_runtime": 4.5463, "eval_samples_per_second": 62.908, "eval_steps_per_second": 0.66, "step": 604 }, { "epoch": 186.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9513095021247864, "eval_runtime": 4.6608, "eval_samples_per_second": 61.363, "eval_steps_per_second": 0.644, "step": 607 }, { "epoch": 188.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9521496891975403, "eval_runtime": 4.2761, "eval_samples_per_second": 66.883, "eval_steps_per_second": 0.702, "step": 611 }, { "epoch": 188.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9504989981651306, "eval_runtime": 4.2369, "eval_samples_per_second": 67.502, "eval_steps_per_second": 0.708, "step": 614 }, { "epoch": 189.85, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9491674304008484, "eval_runtime": 4.201, "eval_samples_per_second": 68.079, "eval_steps_per_second": 0.714, "step": 617 }, { "epoch": 190.77, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9477587938308716, "eval_runtime": 5.2893, "eval_samples_per_second": 54.071, "eval_steps_per_second": 0.567, "step": 620 }, { "epoch": 192.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9457892775535583, "eval_runtime": 4.1359, "eval_samples_per_second": 69.151, "eval_steps_per_second": 0.725, "step": 624 }, { "epoch": 192.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9427019953727722, "eval_runtime": 4.8485, "eval_samples_per_second": 58.987, "eval_steps_per_second": 0.619, "step": 627 }, { "epoch": 193.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9433549046516418, "eval_runtime": 4.4182, "eval_samples_per_second": 64.732, "eval_steps_per_second": 0.679, "step": 630 }, { "epoch": 194.77, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9444069862365723, "eval_runtime": 4.503, "eval_samples_per_second": 63.513, "eval_steps_per_second": 0.666, "step": 633 }, { "epoch": 196.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9477455615997314, "eval_runtime": 4.7047, "eval_samples_per_second": 60.79, "eval_steps_per_second": 0.638, "step": 637 }, { "epoch": 196.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.948028564453125, "eval_runtime": 4.6368, "eval_samples_per_second": 61.681, "eval_steps_per_second": 0.647, "step": 640 }, { "epoch": 197.85, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9453917741775513, "eval_runtime": 4.5374, "eval_samples_per_second": 63.032, "eval_steps_per_second": 0.661, "step": 643 }, { "epoch": 198.77, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.944375216960907, "eval_runtime": 4.3393, "eval_samples_per_second": 65.909, "eval_steps_per_second": 0.691, "step": 646 }, { "epoch": 200.0, "grad_norm": 0.6592119336128235, "learning_rate": 1.2698412698412699e-05, "loss": 0.6402, "step": 650 }, { "epoch": 200.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9393660426139832, "eval_runtime": 4.2917, "eval_samples_per_second": 66.64, "eval_steps_per_second": 0.699, "step": 650 }, { "epoch": 200.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9392969012260437, "eval_runtime": 4.6663, "eval_samples_per_second": 61.291, "eval_steps_per_second": 0.643, "step": 653 }, { "epoch": 201.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9408543705940247, "eval_runtime": 3.7675, "eval_samples_per_second": 75.912, "eval_steps_per_second": 0.796, "step": 656 }, { "epoch": 202.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9433931112289429, "eval_runtime": 4.0263, "eval_samples_per_second": 71.034, "eval_steps_per_second": 0.745, "step": 659 }, { "epoch": 204.0, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9422336220741272, "eval_runtime": 5.0827, "eval_samples_per_second": 56.27, "eval_steps_per_second": 0.59, "step": 663 }, { "epoch": 204.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9422111511230469, "eval_runtime": 4.3449, "eval_samples_per_second": 65.824, "eval_steps_per_second": 0.69, "step": 666 }, { "epoch": 205.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9415052533149719, "eval_runtime": 4.3877, "eval_samples_per_second": 65.183, "eval_steps_per_second": 0.684, "step": 669 }, { "epoch": 206.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9403449892997742, "eval_runtime": 4.5384, "eval_samples_per_second": 63.018, "eval_steps_per_second": 0.661, "step": 672 }, { "epoch": 208.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9443973898887634, "eval_runtime": 4.8712, "eval_samples_per_second": 58.713, "eval_steps_per_second": 0.616, "step": 676 }, { "epoch": 208.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9434468746185303, "eval_runtime": 4.5262, "eval_samples_per_second": 63.188, "eval_steps_per_second": 0.663, "step": 679 }, { "epoch": 209.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9392986297607422, "eval_runtime": 3.8813, "eval_samples_per_second": 73.687, "eval_steps_per_second": 0.773, "step": 682 }, { "epoch": 210.77, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9383959174156189, "eval_runtime": 4.0949, "eval_samples_per_second": 69.843, "eval_steps_per_second": 0.733, "step": 685 }, { "epoch": 212.0, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9405562281608582, "eval_runtime": 4.0071, "eval_samples_per_second": 71.374, "eval_steps_per_second": 0.749, "step": 689 }, { "epoch": 212.92, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9427996873855591, "eval_runtime": 4.4042, "eval_samples_per_second": 64.938, "eval_steps_per_second": 0.681, "step": 692 }, { "epoch": 213.85, "eval_accuracy": 0.6573426573426573, "eval_loss": 0.9420264363288879, "eval_runtime": 5.0628, "eval_samples_per_second": 56.491, "eval_steps_per_second": 0.593, "step": 695 }, { "epoch": 214.77, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9402673840522766, "eval_runtime": 4.5344, "eval_samples_per_second": 63.073, "eval_steps_per_second": 0.662, "step": 698 }, { "epoch": 215.38, "grad_norm": 0.41818705201148987, "learning_rate": 1.111111111111111e-05, "loss": 0.632, "step": 700 }, { "epoch": 216.0, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.939588189125061, "eval_runtime": 4.3905, "eval_samples_per_second": 65.141, "eval_steps_per_second": 0.683, "step": 702 }, { "epoch": 216.92, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9378303289413452, "eval_runtime": 3.949, "eval_samples_per_second": 72.423, "eval_steps_per_second": 0.76, "step": 705 }, { "epoch": 217.85, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9359939694404602, "eval_runtime": 6.0391, "eval_samples_per_second": 47.358, "eval_steps_per_second": 0.497, "step": 708 }, { "epoch": 218.77, "eval_accuracy": 0.6608391608391608, "eval_loss": 0.9351831078529358, "eval_runtime": 3.9064, "eval_samples_per_second": 73.213, "eval_steps_per_second": 0.768, "step": 711 }, { "epoch": 220.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9344322085380554, "eval_runtime": 4.2696, "eval_samples_per_second": 66.986, "eval_steps_per_second": 0.703, "step": 715 }, { "epoch": 220.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9372223615646362, "eval_runtime": 4.3125, "eval_samples_per_second": 66.319, "eval_steps_per_second": 0.696, "step": 718 }, { "epoch": 221.85, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9403982162475586, "eval_runtime": 5.843, "eval_samples_per_second": 48.948, "eval_steps_per_second": 0.513, "step": 721 }, { "epoch": 222.77, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9428540468215942, "eval_runtime": 4.6219, "eval_samples_per_second": 61.879, "eval_steps_per_second": 0.649, "step": 724 }, { "epoch": 224.0, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9426971673965454, "eval_runtime": 4.3056, "eval_samples_per_second": 66.425, "eval_steps_per_second": 0.697, "step": 728 }, { "epoch": 224.92, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9425584673881531, "eval_runtime": 4.1148, "eval_samples_per_second": 69.506, "eval_steps_per_second": 0.729, "step": 731 }, { "epoch": 225.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9411899447441101, "eval_runtime": 5.1345, "eval_samples_per_second": 55.702, "eval_steps_per_second": 0.584, "step": 734 }, { "epoch": 226.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9402187466621399, "eval_runtime": 4.509, "eval_samples_per_second": 63.428, "eval_steps_per_second": 0.665, "step": 737 }, { "epoch": 228.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9380680918693542, "eval_runtime": 4.4438, "eval_samples_per_second": 64.359, "eval_steps_per_second": 0.675, "step": 741 }, { "epoch": 228.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9378944635391235, "eval_runtime": 4.7137, "eval_samples_per_second": 60.674, "eval_steps_per_second": 0.636, "step": 744 }, { "epoch": 229.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9393661618232727, "eval_runtime": 4.3516, "eval_samples_per_second": 65.723, "eval_steps_per_second": 0.689, "step": 747 }, { "epoch": 230.77, "grad_norm": 0.43979376554489136, "learning_rate": 9.523809523809523e-06, "loss": 0.6285, "step": 750 }, { "epoch": 230.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9395984411239624, "eval_runtime": 4.7533, "eval_samples_per_second": 60.169, "eval_steps_per_second": 0.631, "step": 750 }, { "epoch": 232.0, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.943848729133606, "eval_runtime": 4.0413, "eval_samples_per_second": 70.769, "eval_steps_per_second": 0.742, "step": 754 }, { "epoch": 232.92, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9464450478553772, "eval_runtime": 4.5457, "eval_samples_per_second": 62.916, "eval_steps_per_second": 0.66, "step": 757 }, { "epoch": 233.85, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9500763416290283, "eval_runtime": 4.0761, "eval_samples_per_second": 70.166, "eval_steps_per_second": 0.736, "step": 760 }, { "epoch": 234.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9517615437507629, "eval_runtime": 4.7434, "eval_samples_per_second": 60.295, "eval_steps_per_second": 0.632, "step": 763 }, { "epoch": 236.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9502771496772766, "eval_runtime": 4.4044, "eval_samples_per_second": 64.935, "eval_steps_per_second": 0.681, "step": 767 }, { "epoch": 236.92, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9494647979736328, "eval_runtime": 4.5179, "eval_samples_per_second": 63.304, "eval_steps_per_second": 0.664, "step": 770 }, { "epoch": 237.85, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9487398862838745, "eval_runtime": 5.4124, "eval_samples_per_second": 52.842, "eval_steps_per_second": 0.554, "step": 773 }, { "epoch": 238.77, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9492418169975281, "eval_runtime": 4.4487, "eval_samples_per_second": 64.289, "eval_steps_per_second": 0.674, "step": 776 }, { "epoch": 240.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9463514685630798, "eval_runtime": 4.2462, "eval_samples_per_second": 67.354, "eval_steps_per_second": 0.707, "step": 780 }, { "epoch": 240.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9432539343833923, "eval_runtime": 4.4574, "eval_samples_per_second": 64.163, "eval_steps_per_second": 0.673, "step": 783 }, { "epoch": 241.85, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9403123259544373, "eval_runtime": 4.1413, "eval_samples_per_second": 69.06, "eval_steps_per_second": 0.724, "step": 786 }, { "epoch": 242.77, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9371182322502136, "eval_runtime": 4.0874, "eval_samples_per_second": 69.971, "eval_steps_per_second": 0.734, "step": 789 }, { "epoch": 244.0, "eval_accuracy": 0.6643356643356644, "eval_loss": 0.9386875033378601, "eval_runtime": 4.427, "eval_samples_per_second": 64.603, "eval_steps_per_second": 0.678, "step": 793 }, { "epoch": 244.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9423165321350098, "eval_runtime": 4.5541, "eval_samples_per_second": 62.8, "eval_steps_per_second": 0.659, "step": 796 }, { "epoch": 245.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9452247619628906, "eval_runtime": 4.3478, "eval_samples_per_second": 65.78, "eval_steps_per_second": 0.69, "step": 799 }, { "epoch": 246.15, "grad_norm": 0.8526151776313782, "learning_rate": 7.936507936507936e-06, "loss": 0.6049, "step": 800 }, { "epoch": 246.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9474557638168335, "eval_runtime": 4.2324, "eval_samples_per_second": 67.574, "eval_steps_per_second": 0.709, "step": 802 }, { "epoch": 248.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9469492435455322, "eval_runtime": 4.2172, "eval_samples_per_second": 67.817, "eval_steps_per_second": 0.711, "step": 806 }, { "epoch": 248.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9462881088256836, "eval_runtime": 3.7956, "eval_samples_per_second": 75.351, "eval_steps_per_second": 0.79, "step": 809 }, { "epoch": 249.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9461723566055298, "eval_runtime": 4.331, "eval_samples_per_second": 66.036, "eval_steps_per_second": 0.693, "step": 812 }, { "epoch": 250.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.945976972579956, "eval_runtime": 5.0364, "eval_samples_per_second": 56.787, "eval_steps_per_second": 0.596, "step": 815 }, { "epoch": 252.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9463274478912354, "eval_runtime": 4.1886, "eval_samples_per_second": 68.28, "eval_steps_per_second": 0.716, "step": 819 }, { "epoch": 252.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9467651844024658, "eval_runtime": 4.3006, "eval_samples_per_second": 66.502, "eval_steps_per_second": 0.698, "step": 822 }, { "epoch": 253.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9466995596885681, "eval_runtime": 4.7073, "eval_samples_per_second": 60.757, "eval_steps_per_second": 0.637, "step": 825 }, { "epoch": 254.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9466331005096436, "eval_runtime": 4.2578, "eval_samples_per_second": 67.171, "eval_steps_per_second": 0.705, "step": 828 }, { "epoch": 256.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9450873732566833, "eval_runtime": 4.0297, "eval_samples_per_second": 70.973, "eval_steps_per_second": 0.744, "step": 832 }, { "epoch": 256.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9441204071044922, "eval_runtime": 4.3682, "eval_samples_per_second": 65.474, "eval_steps_per_second": 0.687, "step": 835 }, { "epoch": 257.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9426379203796387, "eval_runtime": 4.5099, "eval_samples_per_second": 63.417, "eval_steps_per_second": 0.665, "step": 838 }, { "epoch": 258.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9439011216163635, "eval_runtime": 4.1513, "eval_samples_per_second": 68.894, "eval_steps_per_second": 0.723, "step": 841 }, { "epoch": 260.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9443875551223755, "eval_runtime": 4.5454, "eval_samples_per_second": 62.921, "eval_steps_per_second": 0.66, "step": 845 }, { "epoch": 260.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9435145258903503, "eval_runtime": 4.6513, "eval_samples_per_second": 61.489, "eval_steps_per_second": 0.645, "step": 848 }, { "epoch": 261.54, "grad_norm": 0.46575862169265747, "learning_rate": 6.349206349206349e-06, "loss": 0.6024, "step": 850 }, { "epoch": 261.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9441516399383545, "eval_runtime": 4.3022, "eval_samples_per_second": 66.478, "eval_steps_per_second": 0.697, "step": 851 }, { "epoch": 262.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9440596699714661, "eval_runtime": 4.2068, "eval_samples_per_second": 67.986, "eval_steps_per_second": 0.713, "step": 854 }, { "epoch": 264.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9449206590652466, "eval_runtime": 3.6546, "eval_samples_per_second": 78.258, "eval_steps_per_second": 0.821, "step": 858 }, { "epoch": 264.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9438286423683167, "eval_runtime": 4.4082, "eval_samples_per_second": 64.879, "eval_steps_per_second": 0.681, "step": 861 }, { "epoch": 265.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9423307180404663, "eval_runtime": 5.0619, "eval_samples_per_second": 56.5, "eval_steps_per_second": 0.593, "step": 864 }, { "epoch": 266.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9406384229660034, "eval_runtime": 3.9231, "eval_samples_per_second": 72.902, "eval_steps_per_second": 0.765, "step": 867 }, { "epoch": 268.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9399996399879456, "eval_runtime": 4.3437, "eval_samples_per_second": 65.843, "eval_steps_per_second": 0.691, "step": 871 }, { "epoch": 268.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9407250285148621, "eval_runtime": 4.1958, "eval_samples_per_second": 68.163, "eval_steps_per_second": 0.715, "step": 874 }, { "epoch": 269.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9427564740180969, "eval_runtime": 4.1649, "eval_samples_per_second": 68.668, "eval_steps_per_second": 0.72, "step": 877 }, { "epoch": 270.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9453551769256592, "eval_runtime": 4.1929, "eval_samples_per_second": 68.21, "eval_steps_per_second": 0.715, "step": 880 }, { "epoch": 272.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9466239213943481, "eval_runtime": 4.787, "eval_samples_per_second": 59.745, "eval_steps_per_second": 0.627, "step": 884 }, { "epoch": 272.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9471932649612427, "eval_runtime": 5.1674, "eval_samples_per_second": 55.347, "eval_steps_per_second": 0.581, "step": 887 }, { "epoch": 273.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9461983442306519, "eval_runtime": 4.2961, "eval_samples_per_second": 66.572, "eval_steps_per_second": 0.698, "step": 890 }, { "epoch": 274.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9463549852371216, "eval_runtime": 4.214, "eval_samples_per_second": 67.869, "eval_steps_per_second": 0.712, "step": 893 }, { "epoch": 276.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9452849626541138, "eval_runtime": 4.7011, "eval_samples_per_second": 60.836, "eval_steps_per_second": 0.638, "step": 897 }, { "epoch": 276.92, "grad_norm": 0.5298266410827637, "learning_rate": 4.7619047619047615e-06, "loss": 0.5966, "step": 900 }, { "epoch": 276.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.943517804145813, "eval_runtime": 4.7023, "eval_samples_per_second": 60.821, "eval_steps_per_second": 0.638, "step": 900 }, { "epoch": 277.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9417858123779297, "eval_runtime": 4.8013, "eval_samples_per_second": 59.568, "eval_steps_per_second": 0.625, "step": 903 }, { "epoch": 278.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9401264190673828, "eval_runtime": 4.2146, "eval_samples_per_second": 67.859, "eval_steps_per_second": 0.712, "step": 906 }, { "epoch": 280.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9374663829803467, "eval_runtime": 4.5877, "eval_samples_per_second": 62.34, "eval_steps_per_second": 0.654, "step": 910 }, { "epoch": 280.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9365966320037842, "eval_runtime": 4.7172, "eval_samples_per_second": 60.629, "eval_steps_per_second": 0.636, "step": 913 }, { "epoch": 281.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9357553720474243, "eval_runtime": 4.1128, "eval_samples_per_second": 69.539, "eval_steps_per_second": 0.729, "step": 916 }, { "epoch": 282.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9363517761230469, "eval_runtime": 4.5892, "eval_samples_per_second": 62.321, "eval_steps_per_second": 0.654, "step": 919 }, { "epoch": 284.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9368854761123657, "eval_runtime": 4.6233, "eval_samples_per_second": 61.86, "eval_steps_per_second": 0.649, "step": 923 }, { "epoch": 284.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9384199976921082, "eval_runtime": 4.5974, "eval_samples_per_second": 62.21, "eval_steps_per_second": 0.653, "step": 926 }, { "epoch": 285.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9411068558692932, "eval_runtime": 4.3443, "eval_samples_per_second": 65.834, "eval_steps_per_second": 0.691, "step": 929 }, { "epoch": 286.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9424038529396057, "eval_runtime": 4.1488, "eval_samples_per_second": 68.936, "eval_steps_per_second": 0.723, "step": 932 }, { "epoch": 288.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9443323016166687, "eval_runtime": 3.949, "eval_samples_per_second": 72.423, "eval_steps_per_second": 0.76, "step": 936 }, { "epoch": 288.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.945092499256134, "eval_runtime": 5.4756, "eval_samples_per_second": 52.232, "eval_steps_per_second": 0.548, "step": 939 }, { "epoch": 289.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9460843801498413, "eval_runtime": 4.3265, "eval_samples_per_second": 66.104, "eval_steps_per_second": 0.693, "step": 942 }, { "epoch": 290.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9465355277061462, "eval_runtime": 4.0418, "eval_samples_per_second": 70.761, "eval_steps_per_second": 0.742, "step": 945 }, { "epoch": 292.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9477852582931519, "eval_runtime": 4.5983, "eval_samples_per_second": 62.196, "eval_steps_per_second": 0.652, "step": 949 }, { "epoch": 292.31, "grad_norm": 0.48691806197166443, "learning_rate": 3.1746031746031746e-06, "loss": 0.5841, "step": 950 }, { "epoch": 292.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9479745626449585, "eval_runtime": 4.8468, "eval_samples_per_second": 59.008, "eval_steps_per_second": 0.619, "step": 952 }, { "epoch": 293.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9476760029792786, "eval_runtime": 4.2654, "eval_samples_per_second": 67.051, "eval_steps_per_second": 0.703, "step": 955 }, { "epoch": 294.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9466102719306946, "eval_runtime": 5.1835, "eval_samples_per_second": 55.175, "eval_steps_per_second": 0.579, "step": 958 }, { "epoch": 296.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9453703165054321, "eval_runtime": 4.2033, "eval_samples_per_second": 68.041, "eval_steps_per_second": 0.714, "step": 962 }, { "epoch": 296.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9448667764663696, "eval_runtime": 5.1456, "eval_samples_per_second": 55.582, "eval_steps_per_second": 0.583, "step": 965 }, { "epoch": 297.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.944092333316803, "eval_runtime": 4.8501, "eval_samples_per_second": 58.968, "eval_steps_per_second": 0.619, "step": 968 }, { "epoch": 298.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9439365267753601, "eval_runtime": 4.6304, "eval_samples_per_second": 61.765, "eval_steps_per_second": 0.648, "step": 971 }, { "epoch": 300.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9432730674743652, "eval_runtime": 4.2353, "eval_samples_per_second": 67.527, "eval_steps_per_second": 0.708, "step": 975 }, { "epoch": 300.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9432761669158936, "eval_runtime": 5.7923, "eval_samples_per_second": 49.376, "eval_steps_per_second": 0.518, "step": 978 }, { "epoch": 301.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9426748752593994, "eval_runtime": 5.2861, "eval_samples_per_second": 54.104, "eval_steps_per_second": 0.568, "step": 981 }, { "epoch": 302.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9422904253005981, "eval_runtime": 4.6604, "eval_samples_per_second": 61.368, "eval_steps_per_second": 0.644, "step": 984 }, { "epoch": 304.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.941644012928009, "eval_runtime": 4.5261, "eval_samples_per_second": 63.19, "eval_steps_per_second": 0.663, "step": 988 }, { "epoch": 304.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9412463903427124, "eval_runtime": 3.7658, "eval_samples_per_second": 75.946, "eval_steps_per_second": 0.797, "step": 991 }, { "epoch": 305.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9411738514900208, "eval_runtime": 4.5896, "eval_samples_per_second": 62.315, "eval_steps_per_second": 0.654, "step": 994 }, { "epoch": 306.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9409748315811157, "eval_runtime": 5.0951, "eval_samples_per_second": 56.133, "eval_steps_per_second": 0.589, "step": 997 }, { "epoch": 307.69, "grad_norm": 0.4341520071029663, "learning_rate": 1.5873015873015873e-06, "loss": 0.5913, "step": 1000 }, { "epoch": 308.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9409425258636475, "eval_runtime": 4.2421, "eval_samples_per_second": 67.419, "eval_steps_per_second": 0.707, "step": 1001 }, { "epoch": 308.92, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9412462115287781, "eval_runtime": 5.4761, "eval_samples_per_second": 52.227, "eval_steps_per_second": 0.548, "step": 1004 }, { "epoch": 309.85, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9415078163146973, "eval_runtime": 4.3021, "eval_samples_per_second": 66.479, "eval_steps_per_second": 0.697, "step": 1007 }, { "epoch": 310.77, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9418912529945374, "eval_runtime": 4.6753, "eval_samples_per_second": 61.173, "eval_steps_per_second": 0.642, "step": 1010 }, { "epoch": 312.0, "eval_accuracy": 0.6713286713286714, "eval_loss": 0.9425641298294067, "eval_runtime": 4.2216, "eval_samples_per_second": 67.746, "eval_steps_per_second": 0.711, "step": 1014 }, { "epoch": 312.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9430021047592163, "eval_runtime": 4.5627, "eval_samples_per_second": 62.682, "eval_steps_per_second": 0.658, "step": 1017 }, { "epoch": 313.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9434046149253845, "eval_runtime": 4.4071, "eval_samples_per_second": 64.896, "eval_steps_per_second": 0.681, "step": 1020 }, { "epoch": 314.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9436425566673279, "eval_runtime": 4.4183, "eval_samples_per_second": 64.731, "eval_steps_per_second": 0.679, "step": 1023 }, { "epoch": 316.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9438886046409607, "eval_runtime": 5.0213, "eval_samples_per_second": 56.958, "eval_steps_per_second": 0.597, "step": 1027 }, { "epoch": 316.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9438981413841248, "eval_runtime": 4.4087, "eval_samples_per_second": 64.872, "eval_steps_per_second": 0.68, "step": 1030 }, { "epoch": 317.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9438797831535339, "eval_runtime": 4.2787, "eval_samples_per_second": 66.843, "eval_steps_per_second": 0.701, "step": 1033 }, { "epoch": 318.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9440018534660339, "eval_runtime": 4.5412, "eval_samples_per_second": 62.979, "eval_steps_per_second": 0.661, "step": 1036 }, { "epoch": 320.0, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9439986348152161, "eval_runtime": 5.0195, "eval_samples_per_second": 56.978, "eval_steps_per_second": 0.598, "step": 1040 }, { "epoch": 320.92, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.944036602973938, "eval_runtime": 4.4758, "eval_samples_per_second": 63.899, "eval_steps_per_second": 0.67, "step": 1043 }, { "epoch": 321.85, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9441185593605042, "eval_runtime": 6.0328, "eval_samples_per_second": 47.408, "eval_steps_per_second": 0.497, "step": 1046 }, { "epoch": 322.77, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9441850781440735, "eval_runtime": 4.7198, "eval_samples_per_second": 60.596, "eval_steps_per_second": 0.636, "step": 1049 }, { "epoch": 323.08, "grad_norm": 0.7628626823425293, "learning_rate": 0.0, "loss": 0.5798, "step": 1050 }, { "epoch": 323.08, "eval_accuracy": 0.6678321678321678, "eval_loss": 0.9441931843757629, "eval_runtime": 4.692, "eval_samples_per_second": 60.955, "eval_steps_per_second": 0.639, "step": 1050 }, { "epoch": 323.08, "step": 1050, "total_flos": 4.380490432252032e+18, "train_loss": 0.8572167641775948, "train_runtime": 4856.1657, "train_samples_per_second": 111.57, "train_steps_per_second": 0.216 } ], "logging_steps": 50, "max_steps": 1050, "num_input_tokens_seen": 0, "num_train_epochs": 350, "save_steps": 500, "total_flos": 4.380490432252032e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }