| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 52190, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000999980839241234, | |
| "loss": 0.7834, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0009899980839241235, | |
| "loss": 0.7384, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000979996167848247, | |
| "loss": 0.7597, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0009699942517723703, | |
| "loss": 0.7401, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0009599923356964936, | |
| "loss": 0.7495, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.000949990419620617, | |
| "loss": 0.7017, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0009399885035447404, | |
| "loss": 0.7398, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0009299865874688638, | |
| "loss": 0.6949, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0009199846713929871, | |
| "loss": 0.6335, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0009099827553171107, | |
| "loss": 0.648, | |
| "step": 4698 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.000899980839241234, | |
| "loss": 0.6445, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0008899789231653573, | |
| "loss": 0.6429, | |
| "step": 5742 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0008799770070894808, | |
| "loss": 0.6316, | |
| "step": 6264 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0008699750910136041, | |
| "loss": 0.6066, | |
| "step": 6786 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0008599731749377275, | |
| "loss": 0.6397, | |
| "step": 7308 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0008499712588618509, | |
| "loss": 0.6243, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0008399693427859744, | |
| "loss": 0.6271, | |
| "step": 8352 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0008299674267100977, | |
| "loss": 0.614, | |
| "step": 8874 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0008199655106342211, | |
| "loss": 0.6358, | |
| "step": 9396 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0008099635945583445, | |
| "loss": 0.633, | |
| "step": 9918 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_Macro F1": 0.5744881635875178, | |
| "eval_Macro Precision": 0.6541570568719827, | |
| "eval_Macro Recall": 0.564346253205228, | |
| "eval_Micro F1": 0.826105701864693, | |
| "eval_Micro Precision": 0.826105701864693, | |
| "eval_Micro Recall": 0.826105701864693, | |
| "eval_Weighted F1": 0.7913544313349856, | |
| "eval_Weighted Precision": 0.7843732666581203, | |
| "eval_Weighted Recall": 0.826105701864693, | |
| "eval_accuracy": 0.826105701864693, | |
| "eval_loss": 0.5608153343200684, | |
| "eval_runtime": 2136.2944, | |
| "eval_samples_per_second": 13.029, | |
| "eval_steps_per_second": 1.629, | |
| "step": 10438 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0007999616784824679, | |
| "loss": 0.6139, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0007899597624065913, | |
| "loss": 0.5945, | |
| "step": 10962 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0007799578463307148, | |
| "loss": 0.6018, | |
| "step": 11484 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0007699559302548381, | |
| "loss": 0.597, | |
| "step": 12006 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0007599540141789615, | |
| "loss": 0.5891, | |
| "step": 12528 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.0007499520981030848, | |
| "loss": 0.6139, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0007399501820272084, | |
| "loss": 0.5878, | |
| "step": 13572 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0007299482659513317, | |
| "loss": 0.6183, | |
| "step": 14094 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0007199463498754551, | |
| "loss": 0.6038, | |
| "step": 14616 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0007099444337995785, | |
| "loss": 0.6506, | |
| "step": 15138 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0006999425177237019, | |
| "loss": 0.6063, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0006899406016478252, | |
| "loss": 0.5931, | |
| "step": 16182 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0006799386855719488, | |
| "loss": 0.5828, | |
| "step": 16704 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0006699367694960721, | |
| "loss": 0.5912, | |
| "step": 17226 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0006599348534201954, | |
| "loss": 0.6105, | |
| "step": 17748 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0006499329373443188, | |
| "loss": 0.5893, | |
| "step": 18270 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0006399310212684422, | |
| "loss": 0.611, | |
| "step": 18792 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.0006299291051925656, | |
| "loss": 0.6371, | |
| "step": 19314 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.000619927189116689, | |
| "loss": 0.5925, | |
| "step": 19836 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.0006099252730408125, | |
| "loss": 0.6029, | |
| "step": 20358 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_Macro F1": 0.5060450221643912, | |
| "eval_Macro Precision": 0.6929448637916937, | |
| "eval_Macro Recall": 0.5239477514921818, | |
| "eval_Micro F1": 0.8330758452197032, | |
| "eval_Micro Precision": 0.8330758452197032, | |
| "eval_Micro Recall": 0.8330758452197032, | |
| "eval_Weighted F1": 0.7724263350491593, | |
| "eval_Weighted Precision": 0.7892310862523967, | |
| "eval_Weighted Recall": 0.8330758452197032, | |
| "eval_accuracy": 0.8330758452197032, | |
| "eval_loss": 0.6489848494529724, | |
| "eval_runtime": 1320.9895, | |
| "eval_samples_per_second": 21.07, | |
| "eval_steps_per_second": 2.634, | |
| "step": 20876 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0005999233569649358, | |
| "loss": 0.5882, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.0005899214408890592, | |
| "loss": 0.5831, | |
| "step": 21402 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.0005799195248131826, | |
| "loss": 0.5755, | |
| "step": 21924 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.000569917608737306, | |
| "loss": 0.5822, | |
| "step": 22446 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.0005599156926614294, | |
| "loss": 0.5632, | |
| "step": 22968 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0005499137765855528, | |
| "loss": 0.5756, | |
| "step": 23490 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.0005399118605096762, | |
| "loss": 0.6018, | |
| "step": 24012 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.0005299099444337996, | |
| "loss": 0.5978, | |
| "step": 24534 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.0005199080283579229, | |
| "loss": 0.5767, | |
| "step": 25056 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.0005099061122820464, | |
| "loss": 0.5538, | |
| "step": 25578 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.0004999041962061698, | |
| "loss": 0.5621, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.0004899022801302932, | |
| "loss": 0.594, | |
| "step": 26622 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00047990036405441656, | |
| "loss": 0.5616, | |
| "step": 27144 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.00046989844797853996, | |
| "loss": 0.5444, | |
| "step": 27666 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00045989653190266336, | |
| "loss": 0.5558, | |
| "step": 28188 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 0.00044989461582678675, | |
| "loss": 0.5415, | |
| "step": 28710 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.0004398926997509101, | |
| "loss": 0.5452, | |
| "step": 29232 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00042989078367503355, | |
| "loss": 0.5595, | |
| "step": 29754 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00041988886759915695, | |
| "loss": 0.565, | |
| "step": 30276 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0004098869515232803, | |
| "loss": 0.5478, | |
| "step": 30798 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_Macro F1": 0.6188634906559709, | |
| "eval_Macro Precision": 0.6783819638851676, | |
| "eval_Macro Recall": 0.6003398559173716, | |
| "eval_Micro F1": 0.8304530593180757, | |
| "eval_Micro Precision": 0.8304530593180757, | |
| "eval_Micro Recall": 0.8304530593180757, | |
| "eval_Weighted F1": 0.8070780134214183, | |
| "eval_Weighted Precision": 0.8001960025950923, | |
| "eval_Weighted Recall": 0.8304530593180757, | |
| "eval_accuracy": 0.8304530593180757, | |
| "eval_loss": 0.5508156418800354, | |
| "eval_runtime": 1285.8765, | |
| "eval_samples_per_second": 21.645, | |
| "eval_steps_per_second": 2.706, | |
| "step": 31314 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.00039988503544740374, | |
| "loss": 0.5593, | |
| "step": 31320 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.0003898831193715271, | |
| "loss": 0.5348, | |
| "step": 31842 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.0003798812032956505, | |
| "loss": 0.5459, | |
| "step": 32364 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.00036987928721977394, | |
| "loss": 0.5544, | |
| "step": 32886 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0003598773711438973, | |
| "loss": 0.5442, | |
| "step": 33408 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.00034987545506802074, | |
| "loss": 0.542, | |
| "step": 33930 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.0003398735389921441, | |
| "loss": 0.5325, | |
| "step": 34452 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.0003298716229162675, | |
| "loss": 0.5525, | |
| "step": 34974 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 0.00031986970684039093, | |
| "loss": 0.5547, | |
| "step": 35496 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 0.0003098677907645143, | |
| "loss": 0.5484, | |
| "step": 36018 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 0.00029986587468863767, | |
| "loss": 0.5368, | |
| "step": 36540 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 0.00028986395861276107, | |
| "loss": 0.5487, | |
| "step": 37062 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.00027986204253688447, | |
| "loss": 0.5403, | |
| "step": 37584 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 0.00026986012646100787, | |
| "loss": 0.5459, | |
| "step": 38106 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 0.00025985821038513126, | |
| "loss": 0.5376, | |
| "step": 38628 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.00024985629430925466, | |
| "loss": 0.5186, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 0.00023985437823337806, | |
| "loss": 0.5371, | |
| "step": 39672 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 0.00022985246215750143, | |
| "loss": 0.5413, | |
| "step": 40194 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 0.00021985054608162483, | |
| "loss": 0.5328, | |
| "step": 40716 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 0.00020984863000574823, | |
| "loss": 0.513, | |
| "step": 41238 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_Macro F1": 0.6224111150035051, | |
| "eval_Macro Precision": 0.6915564589610266, | |
| "eval_Macro Recall": 0.60231662265339, | |
| "eval_Micro F1": 0.8347285596234686, | |
| "eval_Micro Precision": 0.8347285596234686, | |
| "eval_Micro Recall": 0.8347285596234686, | |
| "eval_Weighted F1": 0.8100695837354521, | |
| "eval_Weighted Precision": 0.8049325371288767, | |
| "eval_Weighted Recall": 0.8347285596234686, | |
| "eval_accuracy": 0.8347285596234686, | |
| "eval_loss": 0.5459285974502563, | |
| "eval_runtime": 1277.4842, | |
| "eval_samples_per_second": 21.787, | |
| "eval_steps_per_second": 2.724, | |
| "step": 41752 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00019984671392987163, | |
| "loss": 0.5375, | |
| "step": 41760 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 0.00018984479785399502, | |
| "loss": 0.5505, | |
| "step": 42282 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 0.00017984288177811842, | |
| "loss": 0.5258, | |
| "step": 42804 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 0.00016984096570224182, | |
| "loss": 0.5132, | |
| "step": 43326 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.0001598390496263652, | |
| "loss": 0.5182, | |
| "step": 43848 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 0.00014983713355048862, | |
| "loss": 0.5147, | |
| "step": 44370 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.00013983521747461201, | |
| "loss": 0.5364, | |
| "step": 44892 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 0.00012983330139873538, | |
| "loss": 0.53, | |
| "step": 45414 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.00011983138532285878, | |
| "loss": 0.5288, | |
| "step": 45936 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 0.00010982946924698218, | |
| "loss": 0.5087, | |
| "step": 46458 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 9.982755317110558e-05, | |
| "loss": 0.5101, | |
| "step": 46980 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 8.982563709522898e-05, | |
| "loss": 0.5235, | |
| "step": 47502 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 7.982372101935238e-05, | |
| "loss": 0.5143, | |
| "step": 48024 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 6.982180494347576e-05, | |
| "loss": 0.5299, | |
| "step": 48546 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 5.981988886759916e-05, | |
| "loss": 0.5362, | |
| "step": 49068 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 4.981797279172255e-05, | |
| "loss": 0.5186, | |
| "step": 49590 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 3.981605671584595e-05, | |
| "loss": 0.5256, | |
| "step": 50112 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 2.9814140639969346e-05, | |
| "loss": 0.5233, | |
| "step": 50634 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 1.9812224564092737e-05, | |
| "loss": 0.5263, | |
| "step": 51156 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 9.810308488216133e-06, | |
| "loss": 0.5288, | |
| "step": 51678 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_Macro F1": 0.6307998158823078, | |
| "eval_Macro Precision": 0.7029273840489014, | |
| "eval_Macro Recall": 0.6089557840149206, | |
| "eval_Micro F1": 0.8381417741529839, | |
| "eval_Micro Precision": 0.8381417741529839, | |
| "eval_Micro Recall": 0.8381417741529839, | |
| "eval_Weighted F1": 0.8141748808079556, | |
| "eval_Weighted Precision": 0.810121498718634, | |
| "eval_Weighted Recall": 0.8381417741529839, | |
| "eval_accuracy": 0.8381417741529839, | |
| "eval_loss": 0.5336272716522217, | |
| "eval_runtime": 1277.7048, | |
| "eval_samples_per_second": 21.784, | |
| "eval_steps_per_second": 2.724, | |
| "step": 52190 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 52190, | |
| "total_flos": 2.1040687845486864e+16, | |
| "train_loss": 0.5799241374931839, | |
| "train_runtime": 32322.6365, | |
| "train_samples_per_second": 12.916, | |
| "train_steps_per_second": 1.615 | |
| } | |
| ], | |
| "max_steps": 52190, | |
| "num_train_epochs": 5, | |
| "total_flos": 2.1040687845486864e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |