| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2846975088967972, | |
| "eval_steps": 500, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0035587188612099642, | |
| "grad_norm": 3.714656812928013, | |
| "learning_rate": 3.521126760563381e-06, | |
| "loss": 1.2324, | |
| "mean_token_accuracy": 0.706312108039856, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0071174377224199285, | |
| "grad_norm": 2.538884412766077, | |
| "learning_rate": 7.042253521126762e-06, | |
| "loss": 1.1399, | |
| "mean_token_accuracy": 0.7131082773208618, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010676156583629894, | |
| "grad_norm": 1.7771129703443833, | |
| "learning_rate": 1.056338028169014e-05, | |
| "loss": 0.9862, | |
| "mean_token_accuracy": 0.7257840037345886, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.014234875444839857, | |
| "grad_norm": 1.1549668455493816, | |
| "learning_rate": 1.4084507042253523e-05, | |
| "loss": 0.8305, | |
| "mean_token_accuracy": 0.7594800233840943, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.017793594306049824, | |
| "grad_norm": 0.8084059187346242, | |
| "learning_rate": 1.7605633802816902e-05, | |
| "loss": 0.7989, | |
| "mean_token_accuracy": 0.7640575289726257, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.021352313167259787, | |
| "grad_norm": 0.8267562843107086, | |
| "learning_rate": 2.112676056338028e-05, | |
| "loss": 0.7668, | |
| "mean_token_accuracy": 0.7727443218231201, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02491103202846975, | |
| "grad_norm": 0.7851378820161544, | |
| "learning_rate": 2.4647887323943664e-05, | |
| "loss": 0.7649, | |
| "mean_token_accuracy": 0.7709716081619262, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.028469750889679714, | |
| "grad_norm": 0.7637956379243507, | |
| "learning_rate": 2.8169014084507046e-05, | |
| "loss": 0.7394, | |
| "mean_token_accuracy": 0.7772023797035217, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03202846975088968, | |
| "grad_norm": 0.7732620068824387, | |
| "learning_rate": 3.1690140845070426e-05, | |
| "loss": 0.7346, | |
| "mean_token_accuracy": 0.779318380355835, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03558718861209965, | |
| "grad_norm": 0.7696301616562734, | |
| "learning_rate": 3.5211267605633805e-05, | |
| "loss": 0.7206, | |
| "mean_token_accuracy": 0.7811890006065368, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03914590747330961, | |
| "grad_norm": 0.7965650840150478, | |
| "learning_rate": 3.8732394366197184e-05, | |
| "loss": 0.7099, | |
| "mean_token_accuracy": 0.7832624912261963, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.042704626334519574, | |
| "grad_norm": 0.732913212165206, | |
| "learning_rate": 4.225352112676056e-05, | |
| "loss": 0.7365, | |
| "mean_token_accuracy": 0.778208339214325, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.046263345195729534, | |
| "grad_norm": 0.840463903142822, | |
| "learning_rate": 4.577464788732395e-05, | |
| "loss": 0.7158, | |
| "mean_token_accuracy": 0.7836069345474244, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0498220640569395, | |
| "grad_norm": 0.7796733146584423, | |
| "learning_rate": 4.929577464788733e-05, | |
| "loss": 0.6948, | |
| "mean_token_accuracy": 0.7882483005523682, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05338078291814947, | |
| "grad_norm": 0.7826425373484522, | |
| "learning_rate": 4.999900170848507e-05, | |
| "loss": 0.7299, | |
| "mean_token_accuracy": 0.7795511484146118, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05693950177935943, | |
| "grad_norm": 0.7664471564553723, | |
| "learning_rate": 4.9994946301028825e-05, | |
| "loss": 0.7205, | |
| "mean_token_accuracy": 0.7806410670280457, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.060498220640569395, | |
| "grad_norm": 0.6968982746230619, | |
| "learning_rate": 4.99877719462654e-05, | |
| "loss": 0.7217, | |
| "mean_token_accuracy": 0.7813021302223205, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.06405693950177936, | |
| "grad_norm": 0.7093605930206083, | |
| "learning_rate": 4.997747963892645e-05, | |
| "loss": 0.7337, | |
| "mean_token_accuracy": 0.7769145965576172, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06761565836298933, | |
| "grad_norm": 0.7406894880280147, | |
| "learning_rate": 4.99640708060509e-05, | |
| "loss": 0.7337, | |
| "mean_token_accuracy": 0.7780750393867493, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0711743772241993, | |
| "grad_norm": 0.7318985545441932, | |
| "learning_rate": 4.994754730678713e-05, | |
| "loss": 0.727, | |
| "mean_token_accuracy": 0.7799831032752991, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07473309608540925, | |
| "grad_norm": 0.7639016380490891, | |
| "learning_rate": 4.992791143213523e-05, | |
| "loss": 0.7247, | |
| "mean_token_accuracy": 0.7811060786247254, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.07829181494661921, | |
| "grad_norm": 0.8011798663105023, | |
| "learning_rate": 4.990516590462928e-05, | |
| "loss": 0.7161, | |
| "mean_token_accuracy": 0.7826329588890075, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08185053380782918, | |
| "grad_norm": 0.7616291847324664, | |
| "learning_rate": 4.9879313877959934e-05, | |
| "loss": 0.7172, | |
| "mean_token_accuracy": 0.7815984845161438, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.08540925266903915, | |
| "grad_norm": 0.6970328489587734, | |
| "learning_rate": 4.985035893653713e-05, | |
| "loss": 0.6988, | |
| "mean_token_accuracy": 0.7884742975234985, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08896797153024912, | |
| "grad_norm": 0.6887071171016819, | |
| "learning_rate": 4.9818305094993096e-05, | |
| "loss": 0.7252, | |
| "mean_token_accuracy": 0.7797473907470703, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.09252669039145907, | |
| "grad_norm": 0.782855999297394, | |
| "learning_rate": 4.978315679762574e-05, | |
| "loss": 0.7182, | |
| "mean_token_accuracy": 0.7811893105506897, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09608540925266904, | |
| "grad_norm": 0.6962527878171872, | |
| "learning_rate": 4.9744918917782446e-05, | |
| "loss": 0.7274, | |
| "mean_token_accuracy": 0.7792230010032654, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.099644128113879, | |
| "grad_norm": 0.6966111313941732, | |
| "learning_rate": 4.9703596757184346e-05, | |
| "loss": 0.6958, | |
| "mean_token_accuracy": 0.78841632604599, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10320284697508897, | |
| "grad_norm": 0.6965295119485749, | |
| "learning_rate": 4.965919604519125e-05, | |
| "loss": 0.7158, | |
| "mean_token_accuracy": 0.782188069820404, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.10676156583629894, | |
| "grad_norm": 0.7008145512070334, | |
| "learning_rate": 4.96117229380073e-05, | |
| "loss": 0.7311, | |
| "mean_token_accuracy": 0.7790884852409363, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1103202846975089, | |
| "grad_norm": 0.7210254246058475, | |
| "learning_rate": 4.956118401782734e-05, | |
| "loss": 0.6856, | |
| "mean_token_accuracy": 0.7896694540977478, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.11387900355871886, | |
| "grad_norm": 0.7230055796086301, | |
| "learning_rate": 4.950758629192433e-05, | |
| "loss": 0.7205, | |
| "mean_token_accuracy": 0.7811832308769227, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11743772241992882, | |
| "grad_norm": 0.7177778907287135, | |
| "learning_rate": 4.945093719167778e-05, | |
| "loss": 0.7033, | |
| "mean_token_accuracy": 0.7855350494384765, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.12099644128113879, | |
| "grad_norm": 0.6993140429254685, | |
| "learning_rate": 4.939124457154336e-05, | |
| "loss": 0.716, | |
| "mean_token_accuracy": 0.7823803782463074, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.12455516014234876, | |
| "grad_norm": 0.749036299071456, | |
| "learning_rate": 4.932851670796389e-05, | |
| "loss": 0.6944, | |
| "mean_token_accuracy": 0.78707515001297, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.12811387900355872, | |
| "grad_norm": 0.8003534588080977, | |
| "learning_rate": 4.926276229822181e-05, | |
| "loss": 0.7039, | |
| "mean_token_accuracy": 0.7855878114700318, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.13167259786476868, | |
| "grad_norm": 0.6511982930622267, | |
| "learning_rate": 4.919399045923326e-05, | |
| "loss": 0.7046, | |
| "mean_token_accuracy": 0.7856501579284668, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.13523131672597866, | |
| "grad_norm": 0.6631202589315934, | |
| "learning_rate": 4.9122210726284046e-05, | |
| "loss": 0.6918, | |
| "mean_token_accuracy": 0.789514684677124, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1387900355871886, | |
| "grad_norm": 0.6877007898563937, | |
| "learning_rate": 4.904743305170753e-05, | |
| "loss": 0.6973, | |
| "mean_token_accuracy": 0.7876662492752076, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.1423487544483986, | |
| "grad_norm": 0.6594871153113472, | |
| "learning_rate": 4.896966780350477e-05, | |
| "loss": 0.7106, | |
| "mean_token_accuracy": 0.7843179941177368, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.14590747330960854, | |
| "grad_norm": 0.7077337595180323, | |
| "learning_rate": 4.888892576390694e-05, | |
| "loss": 0.7124, | |
| "mean_token_accuracy": 0.7831673264503479, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.1494661921708185, | |
| "grad_norm": 0.6734240121888417, | |
| "learning_rate": 4.88052181278804e-05, | |
| "loss": 0.6822, | |
| "mean_token_accuracy": 0.7912026405334472, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.15302491103202848, | |
| "grad_norm": 0.6209145189032446, | |
| "learning_rate": 4.871855650157446e-05, | |
| "loss": 0.7311, | |
| "mean_token_accuracy": 0.779719889163971, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.15658362989323843, | |
| "grad_norm": 0.6857572196473167, | |
| "learning_rate": 4.8628952900712265e-05, | |
| "loss": 0.7039, | |
| "mean_token_accuracy": 0.7850899338722229, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1601423487544484, | |
| "grad_norm": 0.721269601531556, | |
| "learning_rate": 4.853641974892466e-05, | |
| "loss": 0.6751, | |
| "mean_token_accuracy": 0.7923677682876586, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.16370106761565836, | |
| "grad_norm": 0.6569686267689558, | |
| "learning_rate": 4.8440969876027794e-05, | |
| "loss": 0.6902, | |
| "mean_token_accuracy": 0.7885013699531556, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.16725978647686832, | |
| "grad_norm": 0.652284057104664, | |
| "learning_rate": 4.834261651624412e-05, | |
| "loss": 0.7002, | |
| "mean_token_accuracy": 0.7859378337860108, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1708185053380783, | |
| "grad_norm": 0.6985712892311762, | |
| "learning_rate": 4.824137330636756e-05, | |
| "loss": 0.6937, | |
| "mean_token_accuracy": 0.7884337782859803, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.17437722419928825, | |
| "grad_norm": 0.6981291268835246, | |
| "learning_rate": 4.8137254283872696e-05, | |
| "loss": 0.706, | |
| "mean_token_accuracy": 0.7852147102355957, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.17793594306049823, | |
| "grad_norm": 0.6830081055108704, | |
| "learning_rate": 4.803027388496845e-05, | |
| "loss": 0.6813, | |
| "mean_token_accuracy": 0.7909272313117981, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18149466192170818, | |
| "grad_norm": 0.6763389483738134, | |
| "learning_rate": 4.7920446942596535e-05, | |
| "loss": 0.7104, | |
| "mean_token_accuracy": 0.7841034054756164, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.18505338078291814, | |
| "grad_norm": 0.6758095067389677, | |
| "learning_rate": 4.780778868437481e-05, | |
| "loss": 0.6912, | |
| "mean_token_accuracy": 0.7872965097427368, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.18861209964412812, | |
| "grad_norm": 0.6654497461754171, | |
| "learning_rate": 4.769231473048598e-05, | |
| "loss": 0.7035, | |
| "mean_token_accuracy": 0.7856454253196716, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.19217081850533807, | |
| "grad_norm": 0.6692448464596932, | |
| "learning_rate": 4.757404109151184e-05, | |
| "loss": 0.6956, | |
| "mean_token_accuracy": 0.7881039142608642, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.19572953736654805, | |
| "grad_norm": 0.6384295872220533, | |
| "learning_rate": 4.745298416621336e-05, | |
| "loss": 0.6941, | |
| "mean_token_accuracy": 0.7865496397018432, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.199288256227758, | |
| "grad_norm": 0.6917569136927582, | |
| "learning_rate": 4.7329160739257035e-05, | |
| "loss": 0.6653, | |
| "mean_token_accuracy": 0.7947100758552551, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.20284697508896798, | |
| "grad_norm": 0.6237053314088369, | |
| "learning_rate": 4.720258797888762e-05, | |
| "loss": 0.6954, | |
| "mean_token_accuracy": 0.787773597240448, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.20640569395017794, | |
| "grad_norm": 0.6404989768357283, | |
| "learning_rate": 4.707328343454777e-05, | |
| "loss": 0.7143, | |
| "mean_token_accuracy": 0.7825374364852905, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2099644128113879, | |
| "grad_norm": 0.6609383386205755, | |
| "learning_rate": 4.694126503444479e-05, | |
| "loss": 0.7104, | |
| "mean_token_accuracy": 0.7829306364059448, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.21352313167259787, | |
| "grad_norm": 0.6390075364325144, | |
| "learning_rate": 4.680655108306484e-05, | |
| "loss": 0.7068, | |
| "mean_token_accuracy": 0.7846204996109009, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.21708185053380782, | |
| "grad_norm": 0.7320785611015472, | |
| "learning_rate": 4.666916025863505e-05, | |
| "loss": 0.6866, | |
| "mean_token_accuracy": 0.7891976118087769, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2206405693950178, | |
| "grad_norm": 0.6542388529582787, | |
| "learning_rate": 4.652911161053369e-05, | |
| "loss": 0.6684, | |
| "mean_token_accuracy": 0.7946648597717285, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.22419928825622776, | |
| "grad_norm": 0.6387809858585279, | |
| "learning_rate": 4.6386424556649046e-05, | |
| "loss": 0.7067, | |
| "mean_token_accuracy": 0.784406590461731, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2277580071174377, | |
| "grad_norm": 0.6511518481529157, | |
| "learning_rate": 4.624111888068704e-05, | |
| "loss": 0.6669, | |
| "mean_token_accuracy": 0.7934553146362304, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2313167259786477, | |
| "grad_norm": 0.7857269281531926, | |
| "learning_rate": 4.6093214729428236e-05, | |
| "loss": 0.6977, | |
| "mean_token_accuracy": 0.7871865391731262, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.23487544483985764, | |
| "grad_norm": 0.708401390320286, | |
| "learning_rate": 4.5942732609934436e-05, | |
| "loss": 0.6917, | |
| "mean_token_accuracy": 0.7883163809776306, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.23843416370106763, | |
| "grad_norm": 0.6537859721831119, | |
| "learning_rate": 4.57896933867054e-05, | |
| "loss": 0.7016, | |
| "mean_token_accuracy": 0.785762631893158, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.24199288256227758, | |
| "grad_norm": 0.6322721246524066, | |
| "learning_rate": 4.563411827878591e-05, | |
| "loss": 0.6591, | |
| "mean_token_accuracy": 0.7976097822189331, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.24555160142348753, | |
| "grad_norm": 0.613702823629008, | |
| "learning_rate": 4.5476028856823774e-05, | |
| "loss": 0.6831, | |
| "mean_token_accuracy": 0.7909620523452758, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2491103202846975, | |
| "grad_norm": 0.6470379557892744, | |
| "learning_rate": 4.531544704007899e-05, | |
| "loss": 0.6877, | |
| "mean_token_accuracy": 0.7888366222381592, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2526690391459075, | |
| "grad_norm": 0.6349334832549038, | |
| "learning_rate": 4.5152395093384655e-05, | |
| "loss": 0.6855, | |
| "mean_token_accuracy": 0.7899898767471314, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.25622775800711745, | |
| "grad_norm": 0.6462491719466713, | |
| "learning_rate": 4.4986895624059934e-05, | |
| "loss": 0.6932, | |
| "mean_token_accuracy": 0.7873799800872803, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2597864768683274, | |
| "grad_norm": 0.630603740544669, | |
| "learning_rate": 4.481897157877545e-05, | |
| "loss": 0.709, | |
| "mean_token_accuracy": 0.7845431327819824, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.26334519572953735, | |
| "grad_norm": 0.6086861709084705, | |
| "learning_rate": 4.464864624037182e-05, | |
| "loss": 0.6825, | |
| "mean_token_accuracy": 0.7913990497589112, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2669039145907473, | |
| "grad_norm": 1.2548196206715339, | |
| "learning_rate": 4.447594322463137e-05, | |
| "loss": 0.6954, | |
| "mean_token_accuracy": 0.7860358953475952, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2704626334519573, | |
| "grad_norm": 0.7181263405100091, | |
| "learning_rate": 4.4300886477003836e-05, | |
| "loss": 0.6813, | |
| "mean_token_accuracy": 0.7917158126831054, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.27402135231316727, | |
| "grad_norm": 0.6711130910236095, | |
| "learning_rate": 4.412350026928628e-05, | |
| "loss": 0.6896, | |
| "mean_token_accuracy": 0.7883881092071533, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2775800711743772, | |
| "grad_norm": 0.6035405028151128, | |
| "learning_rate": 4.3943809196257794e-05, | |
| "loss": 0.6848, | |
| "mean_token_accuracy": 0.7895100355148316, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.28113879003558717, | |
| "grad_norm": 0.5844919307530534, | |
| "learning_rate": 4.37618381722694e-05, | |
| "loss": 0.6668, | |
| "mean_token_accuracy": 0.7936931371688842, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2846975088967972, | |
| "grad_norm": 0.6492460475437084, | |
| "learning_rate": 4.357761242778965e-05, | |
| "loss": 0.6684, | |
| "mean_token_accuracy": 0.7950313925743103, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2846975088967972, | |
| "step": 400, | |
| "total_flos": 38010460569600.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 2.1351, | |
| "train_samples_per_second": 2851.346, | |
| "train_steps_per_second": 44.962 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 96, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 38010460569600.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |