| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9944341372912802, | |
| "eval_steps": 800, | |
| "global_step": 4300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0, | |
| "loss": 2.4801, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0, | |
| "loss": 2.4284, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0, | |
| "loss": 2.2651, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0, | |
| "loss": 2.411, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0, | |
| "loss": 2.8299, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0, | |
| "loss": 2.2188, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.3082402064781276e-06, | |
| "loss": 1.345, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.9623603097171917e-06, | |
| "loss": 0.5695, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.262883767531511e-06, | |
| "loss": 0.8812, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.5555756797431724e-06, | |
| "loss": 0.8725, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.7786547836457785e-06, | |
| "loss": 0.4851, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.9589528137043157e-06, | |
| "loss": 0.8354, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.110267503805303e-06, | |
| "loss": 0.4873, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.2406394020168525e-06, | |
| "loss": 0.6584, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.3551671365864186e-06, | |
| "loss": 0.529, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.4572878450621517e-06, | |
| "loss": 0.5812, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.5494288615482305e-06, | |
| "loss": 0.5163, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.6333682331099297e-06, | |
| "loss": 0.6595, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.710447450306277e-06, | |
| "loss": 0.4775, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.7817036126729157e-06, | |
| "loss": 0.682, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.84795507876713e-06, | |
| "loss": 0.7048, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.909858960648549e-06, | |
| "loss": 0.9478, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.9679508875196075e-06, | |
| "loss": 0.5932, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.022673220704539e-06, | |
| "loss": 0.5733, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.074395524884577e-06, | |
| "loss": 0.5817, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.123429713794031e-06, | |
| "loss": 0.6372, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.170041450985754e-06, | |
| "loss": 0.5108, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.214458864668026e-06, | |
| "loss": 0.6262, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.256879301905398e-06, | |
| "loss": 0.8594, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.297474628787183e-06, | |
| "loss": 0.7241, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.336395436735046e-06, | |
| "loss": 0.6471, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.373774415149143e-06, | |
| "loss": 0.6144, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.409729081127459e-06, | |
| "loss": 0.672, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.444364007946065e-06, | |
| "loss": 0.6802, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.4777726588457195e-06, | |
| "loss": 0.514, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.510038907149524e-06, | |
| "loss": 0.715, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.541238304971202e-06, | |
| "loss": 0.5189, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.5714391488166745e-06, | |
| "loss": 0.7188, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.600703379889684e-06, | |
| "loss": 0.5829, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.629087348946707e-06, | |
| "loss": 0.5551, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.656642469442713e-06, | |
| "loss": 0.7016, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.683415777991895e-06, | |
| "loss": 0.5357, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.709450417491796e-06, | |
| "loss": 0.6232, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.734786055373451e-06, | |
| "loss": 0.7218, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.759459247158257e-06, | |
| "loss": 0.51, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.783503753685794e-06, | |
| "loss": 0.8871, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.806950818921448e-06, | |
| "loss": 0.8177, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8298294140798465e-06, | |
| "loss": 0.5602, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.852166452849314e-06, | |
| "loss": 0.7395, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8739869817278244e-06, | |
| "loss": 0.7008, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.89531434884623e-06, | |
| "loss": 0.7096, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.916170354132174e-06, | |
| "loss": 0.7617, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.936575383236021e-06, | |
| "loss": 0.637, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.956548527281403e-06, | |
| "loss": 0.7149, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.976107690203556e-06, | |
| "loss": 0.585, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.995269685187989e-06, | |
| "loss": 0.4153, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.998404594767071e-06, | |
| "loss": 0.8215, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.995213784301213e-06, | |
| "loss": 0.6051, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.992022973835355e-06, | |
| "loss": 0.6785, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.988832163369496e-06, | |
| "loss": 0.6774, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.985641352903638e-06, | |
| "loss": 0.5039, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.982450542437779e-06, | |
| "loss": 0.5724, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.979259731971921e-06, | |
| "loss": 0.8469, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.976068921506063e-06, | |
| "loss": 0.9735, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.972878111040205e-06, | |
| "loss": 0.8594, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.969687300574346e-06, | |
| "loss": 0.6523, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9664964901084875e-06, | |
| "loss": 0.6641, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.96330567964263e-06, | |
| "loss": 0.6586, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.960114869176771e-06, | |
| "loss": 0.6019, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.956924058710913e-06, | |
| "loss": 0.7117, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.953733248245054e-06, | |
| "loss": 0.4632, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.950542437779196e-06, | |
| "loss": 0.733, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.947351627313338e-06, | |
| "loss": 0.7205, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9441608168474795e-06, | |
| "loss": 0.6121, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.940970006381621e-06, | |
| "loss": 0.7859, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9377791959157625e-06, | |
| "loss": 0.8041, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.934588385449905e-06, | |
| "loss": 0.7834, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.931397574984046e-06, | |
| "loss": 0.6704, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.928206764518188e-06, | |
| "loss": 0.8402, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.925015954052329e-06, | |
| "loss": 0.7851, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9218251435864715e-06, | |
| "loss": 0.501, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.918634333120613e-06, | |
| "loss": 0.6039, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9154435226547544e-06, | |
| "loss": 0.64, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.912252712188896e-06, | |
| "loss": 0.5726, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.909061901723038e-06, | |
| "loss": 0.6605, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.90587109125718e-06, | |
| "loss": 0.8105, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.902680280791321e-06, | |
| "loss": 0.8422, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8994894703254635e-06, | |
| "loss": 0.5242, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.896298659859605e-06, | |
| "loss": 0.6062, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.8931078493937464e-06, | |
| "loss": 0.7289, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.889917038927888e-06, | |
| "loss": 0.6916, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.88672622846203e-06, | |
| "loss": 0.8526, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.883535417996172e-06, | |
| "loss": 1.0668, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.880344607530313e-06, | |
| "loss": 0.6912, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.877153797064455e-06, | |
| "loss": 0.7383, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.873962986598597e-06, | |
| "loss": 0.77, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.870772176132738e-06, | |
| "loss": 0.8328, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.86758136566688e-06, | |
| "loss": 0.7135, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.864390555201021e-06, | |
| "loss": 0.7976, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.861199744735164e-06, | |
| "loss": 0.5799, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.858008934269305e-06, | |
| "loss": 0.5246, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.854818123803447e-06, | |
| "loss": 0.5895, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.851627313337588e-06, | |
| "loss": 0.7751, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.84843650287173e-06, | |
| "loss": 0.7469, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.845245692405872e-06, | |
| "loss": 0.5013, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.842054881940013e-06, | |
| "loss": 0.5398, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.838864071474155e-06, | |
| "loss": 0.4547, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.835673261008296e-06, | |
| "loss": 0.8732, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.832482450542439e-06, | |
| "loss": 0.7671, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.82929164007658e-06, | |
| "loss": 0.6574, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8261008296107215e-06, | |
| "loss": 0.7173, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.822910019144863e-06, | |
| "loss": 0.4371, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.819719208679005e-06, | |
| "loss": 0.6992, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.816528398213147e-06, | |
| "loss": 0.6827, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.813337587747288e-06, | |
| "loss": 0.4919, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.81014677728143e-06, | |
| "loss": 0.9571, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.806955966815571e-06, | |
| "loss": 0.5202, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8037651563497135e-06, | |
| "loss": 0.7919, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.800574345883855e-06, | |
| "loss": 0.5517, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.7973835354179965e-06, | |
| "loss": 0.3889, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.794192724952138e-06, | |
| "loss": 0.5933, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.79100191448628e-06, | |
| "loss": 0.9298, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.787811104020422e-06, | |
| "loss": 0.4758, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.784620293554563e-06, | |
| "loss": 0.5162, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.781429483088705e-06, | |
| "loss": 0.6675, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.778238672622846e-06, | |
| "loss": 0.8493, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.7750478621569885e-06, | |
| "loss": 0.6583, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.77185705169113e-06, | |
| "loss": 0.4897, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.768666241225271e-06, | |
| "loss": 0.6633, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.765475430759413e-06, | |
| "loss": 0.782, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.762284620293555e-06, | |
| "loss": 0.815, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.759093809827697e-06, | |
| "loss": 0.4498, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.755902999361838e-06, | |
| "loss": 0.6006, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.75271218889598e-06, | |
| "loss": 0.9473, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.749521378430121e-06, | |
| "loss": 0.4036, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.746330567964263e-06, | |
| "loss": 0.555, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.743139757498405e-06, | |
| "loss": 0.7843, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.739948947032546e-06, | |
| "loss": 0.8376, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.736758136566688e-06, | |
| "loss": 0.5423, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.73356732610083e-06, | |
| "loss": 0.5533, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.7303765156349716e-06, | |
| "loss": 0.5212, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.727185705169113e-06, | |
| "loss": 0.8054, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.7239948947032545e-06, | |
| "loss": 0.438, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.720804084237397e-06, | |
| "loss": 0.6025, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.717613273771538e-06, | |
| "loss": 0.8118, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.71442246330568e-06, | |
| "loss": 0.6911, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.711231652839821e-06, | |
| "loss": 0.7022, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.7080408423739636e-06, | |
| "loss": 0.5918, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.704850031908105e-06, | |
| "loss": 0.6012, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.7016592214422465e-06, | |
| "loss": 0.8031, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.698468410976389e-06, | |
| "loss": 0.7864, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.69527760051053e-06, | |
| "loss": 0.6361, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.692086790044672e-06, | |
| "loss": 0.6619, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.688895979578813e-06, | |
| "loss": 0.5132, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.6857051691129555e-06, | |
| "loss": 0.6111, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.682514358647097e-06, | |
| "loss": 0.7884, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.6793235481812385e-06, | |
| "loss": 0.4355, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.67613273771538e-06, | |
| "loss": 0.7325, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.672941927249522e-06, | |
| "loss": 0.5633, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.669751116783664e-06, | |
| "loss": 0.6415, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.666560306317805e-06, | |
| "loss": 0.6508, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.663369495851947e-06, | |
| "loss": 0.5909, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.660178685386089e-06, | |
| "loss": 0.5651, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.6569878749202305e-06, | |
| "loss": 0.6729, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.653797064454372e-06, | |
| "loss": 0.842, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.650606253988513e-06, | |
| "loss": 0.5844, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.647415443522656e-06, | |
| "loss": 0.7394, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.644224633056797e-06, | |
| "loss": 0.6725, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.641033822590939e-06, | |
| "loss": 0.6416, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.63784301212508e-06, | |
| "loss": 0.7926, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.634652201659222e-06, | |
| "loss": 0.5941, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.631461391193364e-06, | |
| "loss": 0.9582, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.628270580727505e-06, | |
| "loss": 0.4289, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.625079770261647e-06, | |
| "loss": 0.6518, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.621888959795788e-06, | |
| "loss": 0.8722, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.618698149329931e-06, | |
| "loss": 0.5419, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.615507338864072e-06, | |
| "loss": 0.6891, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.612316528398214e-06, | |
| "loss": 0.5157, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.609125717932355e-06, | |
| "loss": 0.7015, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.6059349074664965e-06, | |
| "loss": 0.546, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.602744097000639e-06, | |
| "loss": 0.6735, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.59955328653478e-06, | |
| "loss": 0.5564, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.596362476068922e-06, | |
| "loss": 0.5182, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.593171665603063e-06, | |
| "loss": 0.4053, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.5899808551372056e-06, | |
| "loss": 0.4039, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.586790044671347e-06, | |
| "loss": 0.6502, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.5835992342054885e-06, | |
| "loss": 0.8062, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.58040842373963e-06, | |
| "loss": 0.4143, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.5772176132737715e-06, | |
| "loss": 0.5539, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.574026802807914e-06, | |
| "loss": 0.5926, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.570835992342055e-06, | |
| "loss": 0.751, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.567645181876197e-06, | |
| "loss": 0.5886, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.564454371410338e-06, | |
| "loss": 0.677, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.5612635609444805e-06, | |
| "loss": 0.7097, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.558072750478622e-06, | |
| "loss": 0.56, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.5548819400127634e-06, | |
| "loss": 0.4481, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.551691129546905e-06, | |
| "loss": 0.4959, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.548500319081046e-06, | |
| "loss": 0.8399, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.545309508615189e-06, | |
| "loss": 0.6904, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.54211869814933e-06, | |
| "loss": 0.8689, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.538927887683472e-06, | |
| "loss": 0.6232, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.535737077217613e-06, | |
| "loss": 0.6428, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.5325462667517554e-06, | |
| "loss": 0.7462, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.529355456285897e-06, | |
| "loss": 0.529, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.526164645820038e-06, | |
| "loss": 0.4875, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.52297383535418e-06, | |
| "loss": 0.6747, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.519783024888322e-06, | |
| "loss": 0.7061, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.516592214422464e-06, | |
| "loss": 0.7865, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.513401403956605e-06, | |
| "loss": 0.5122, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.5102105934907466e-06, | |
| "loss": 0.4014, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.507019783024889e-06, | |
| "loss": 0.7509, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.50382897255903e-06, | |
| "loss": 0.8073, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.500638162093172e-06, | |
| "loss": 0.3459, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.497447351627314e-06, | |
| "loss": 0.6814, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.494256541161456e-06, | |
| "loss": 1.1027, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.491065730695597e-06, | |
| "loss": 0.5254, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.4878749202297385e-06, | |
| "loss": 0.7436, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.484684109763881e-06, | |
| "loss": 0.4877, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.481493299298022e-06, | |
| "loss": 0.657, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.478302488832164e-06, | |
| "loss": 0.7193, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.475111678366305e-06, | |
| "loss": 0.5461, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.471920867900448e-06, | |
| "loss": 0.5707, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.468730057434589e-06, | |
| "loss": 0.9755, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.4655392469687305e-06, | |
| "loss": 0.551, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.462348436502872e-06, | |
| "loss": 0.499, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.459157626037014e-06, | |
| "loss": 0.4268, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.455966815571156e-06, | |
| "loss": 0.6658, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.452776005105297e-06, | |
| "loss": 0.5642, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.449585194639439e-06, | |
| "loss": 0.6943, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.446394384173581e-06, | |
| "loss": 0.5404, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.4432035737077225e-06, | |
| "loss": 0.7934, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.440012763241864e-06, | |
| "loss": 0.7138, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.4368219527760055e-06, | |
| "loss": 0.5249, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.433631142310147e-06, | |
| "loss": 0.9614, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.430440331844289e-06, | |
| "loss": 0.5915, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.427249521378431e-06, | |
| "loss": 0.6766, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.424058710912572e-06, | |
| "loss": 0.6641, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.420867900446714e-06, | |
| "loss": 0.4849, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.417677089980856e-06, | |
| "loss": 0.7182, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.4144862795149974e-06, | |
| "loss": 0.6782, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.411295469049139e-06, | |
| "loss": 0.4837, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.40810465858328e-06, | |
| "loss": 0.7323, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.404913848117422e-06, | |
| "loss": 0.5807, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.401723037651564e-06, | |
| "loss": 0.373, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.398532227185706e-06, | |
| "loss": 0.5072, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.395341416719847e-06, | |
| "loss": 0.5952, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.392150606253989e-06, | |
| "loss": 0.549, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.388959795788131e-06, | |
| "loss": 0.5918, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.385768985322272e-06, | |
| "loss": 0.4411, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.382578174856414e-06, | |
| "loss": 0.7001, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.379387364390555e-06, | |
| "loss": 0.744, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.376196553924697e-06, | |
| "loss": 0.4091, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.373005743458839e-06, | |
| "loss": 0.7464, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.3698149329929806e-06, | |
| "loss": 0.6164, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.366624122527122e-06, | |
| "loss": 0.6213, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.3634333120612635e-06, | |
| "loss": 0.6991, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.360242501595406e-06, | |
| "loss": 0.5268, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.357051691129547e-06, | |
| "loss": 0.7768, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.353860880663689e-06, | |
| "loss": 0.9204, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.35067007019783e-06, | |
| "loss": 0.5844, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.347479259731972e-06, | |
| "loss": 0.5198, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.344288449266114e-06, | |
| "loss": 0.3069, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.3410976388002555e-06, | |
| "loss": 0.5465, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.337906828334397e-06, | |
| "loss": 0.4729, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.3347160178685384e-06, | |
| "loss": 0.6514, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.331525207402681e-06, | |
| "loss": 0.8142, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.328334396936822e-06, | |
| "loss": 0.6477, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.325143586470964e-06, | |
| "loss": 0.4601, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.321952776005105e-06, | |
| "loss": 0.6687, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.3187619655392475e-06, | |
| "loss": 0.4565, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.315571155073389e-06, | |
| "loss": 0.646, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.31238034460753e-06, | |
| "loss": 0.6145, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.309189534141672e-06, | |
| "loss": 0.3854, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.305998723675814e-06, | |
| "loss": 0.6016, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.302807913209956e-06, | |
| "loss": 0.5223, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.299617102744097e-06, | |
| "loss": 0.6356, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.2964262922782395e-06, | |
| "loss": 0.4599, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.293235481812381e-06, | |
| "loss": 0.6452, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.290044671346522e-06, | |
| "loss": 0.386, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.286853860880664e-06, | |
| "loss": 0.6384, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.283663050414806e-06, | |
| "loss": 0.7654, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.280472239948948e-06, | |
| "loss": 0.6019, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.277281429483089e-06, | |
| "loss": 0.6078, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.274090619017231e-06, | |
| "loss": 0.5181, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.270899808551373e-06, | |
| "loss": 0.6731, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.267708998085514e-06, | |
| "loss": 0.4956, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.264518187619656e-06, | |
| "loss": 0.6115, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.261327377153797e-06, | |
| "loss": 0.7712, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.25813656668794e-06, | |
| "loss": 0.5086, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.254945756222081e-06, | |
| "loss": 0.7241, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.251754945756223e-06, | |
| "loss": 0.5275, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.248564135290364e-06, | |
| "loss": 0.7552, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.245373324824506e-06, | |
| "loss": 0.4292, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.242182514358648e-06, | |
| "loss": 0.7575, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.238991703892789e-06, | |
| "loss": 0.5653, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.235800893426931e-06, | |
| "loss": 0.6882, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.232610082961072e-06, | |
| "loss": 0.6488, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.2294192724952146e-06, | |
| "loss": 0.5522, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.226228462029356e-06, | |
| "loss": 0.578, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.2230376515634975e-06, | |
| "loss": 0.7412, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.219846841097639e-06, | |
| "loss": 0.5138, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.216656030631781e-06, | |
| "loss": 0.6943, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.213465220165923e-06, | |
| "loss": 0.4599, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.210274409700064e-06, | |
| "loss": 0.8815, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.207083599234206e-06, | |
| "loss": 0.6245, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.203892788768347e-06, | |
| "loss": 0.5513, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.2007019783024895e-06, | |
| "loss": 0.4635, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.197511167836631e-06, | |
| "loss": 0.5711, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.1943203573707724e-06, | |
| "loss": 0.5078, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.191129546904914e-06, | |
| "loss": 0.4304, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.187938736439056e-06, | |
| "loss": 0.715, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.184747925973198e-06, | |
| "loss": 0.6305, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.181557115507339e-06, | |
| "loss": 0.6243, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.178366305041481e-06, | |
| "loss": 0.6439, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.175175494575622e-06, | |
| "loss": 0.4782, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.171984684109764e-06, | |
| "loss": 0.4523, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.168793873643906e-06, | |
| "loss": 0.4884, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.165603063178047e-06, | |
| "loss": 0.3461, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.162412252712189e-06, | |
| "loss": 0.2459, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.159221442246331e-06, | |
| "loss": 0.8138, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.156030631780473e-06, | |
| "loss": 0.6026, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.152839821314614e-06, | |
| "loss": 0.5463, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.1496490108487556e-06, | |
| "loss": 0.4317, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.146458200382897e-06, | |
| "loss": 0.6244, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.143267389917039e-06, | |
| "loss": 0.554, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.140076579451181e-06, | |
| "loss": 0.6441, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.136885768985322e-06, | |
| "loss": 0.6233, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.133694958519464e-06, | |
| "loss": 0.5561, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.130504148053606e-06, | |
| "loss": 0.7524, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.1273133375877475e-06, | |
| "loss": 0.4338, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.124122527121889e-06, | |
| "loss": 0.4495, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.1209317166560305e-06, | |
| "loss": 0.5139, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.117740906190173e-06, | |
| "loss": 0.6545, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.114550095724314e-06, | |
| "loss": 0.5588, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.111359285258456e-06, | |
| "loss": 0.609, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.108168474792597e-06, | |
| "loss": 0.553, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.1049776643267395e-06, | |
| "loss": 0.5844, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.101786853860881e-06, | |
| "loss": 0.5779, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.0985960433950225e-06, | |
| "loss": 0.4207, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.095405232929165e-06, | |
| "loss": 0.4617, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.092214422463306e-06, | |
| "loss": 0.6092, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.089023611997448e-06, | |
| "loss": 0.4607, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.085832801531589e-06, | |
| "loss": 0.4239, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.0826419910657315e-06, | |
| "loss": 0.5438, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.079451180599873e-06, | |
| "loss": 0.5006, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.0762603701340144e-06, | |
| "loss": 0.6889, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.073069559668156e-06, | |
| "loss": 0.5742, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.069878749202298e-06, | |
| "loss": 0.8366, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.06668793873644e-06, | |
| "loss": 0.5182, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.063497128270581e-06, | |
| "loss": 0.4807, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.060306317804723e-06, | |
| "loss": 0.3995, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.057115507338865e-06, | |
| "loss": 0.5958, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.0539246968730064e-06, | |
| "loss": 0.4855, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.050733886407148e-06, | |
| "loss": 0.5908, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.047543075941289e-06, | |
| "loss": 0.7867, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.044352265475432e-06, | |
| "loss": 0.7617, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.041161455009573e-06, | |
| "loss": 0.4752, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.037970644543715e-06, | |
| "loss": 0.4732, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.034779834077856e-06, | |
| "loss": 0.635, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.0315890236119976e-06, | |
| "loss": 0.4924, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.02839821314614e-06, | |
| "loss": 0.4416, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.025207402680281e-06, | |
| "loss": 0.4448, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.022016592214423e-06, | |
| "loss": 0.7631, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.018825781748564e-06, | |
| "loss": 0.5035, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.015634971282707e-06, | |
| "loss": 0.3779, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.012444160816848e-06, | |
| "loss": 0.4924, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.0092533503509896e-06, | |
| "loss": 0.3932, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.006062539885131e-06, | |
| "loss": 0.6974, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.0028717294192725e-06, | |
| "loss": 0.7347, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.999680918953415e-06, | |
| "loss": 0.5564, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.996490108487556e-06, | |
| "loss": 0.4424, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.993299298021698e-06, | |
| "loss": 0.5323, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.990108487555839e-06, | |
| "loss": 0.6138, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.9869176770899815e-06, | |
| "loss": 0.5156, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.983726866624123e-06, | |
| "loss": 0.282, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.9805360561582645e-06, | |
| "loss": 0.5392, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.977345245692406e-06, | |
| "loss": 0.5721, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.974154435226547e-06, | |
| "loss": 0.6967, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.97096362476069e-06, | |
| "loss": 0.5348, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.967772814294831e-06, | |
| "loss": 0.6884, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.964582003828973e-06, | |
| "loss": 0.5065, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.961391193363114e-06, | |
| "loss": 0.4505, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.9582003828972565e-06, | |
| "loss": 0.6881, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.955009572431398e-06, | |
| "loss": 0.5952, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.951818761965539e-06, | |
| "loss": 0.5656, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.948627951499681e-06, | |
| "loss": 0.6437, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.945437141033822e-06, | |
| "loss": 0.5179, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.942246330567965e-06, | |
| "loss": 0.5278, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.939055520102106e-06, | |
| "loss": 0.6951, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.935864709636248e-06, | |
| "loss": 0.5468, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.932673899170389e-06, | |
| "loss": 0.5132, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.929483088704531e-06, | |
| "loss": 0.6297, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.926292278238673e-06, | |
| "loss": 0.5472, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.923101467772814e-06, | |
| "loss": 0.6623, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.919910657306956e-06, | |
| "loss": 0.6216, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.916719846841098e-06, | |
| "loss": 0.5332, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.91352903637524e-06, | |
| "loss": 0.4792, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.910338225909381e-06, | |
| "loss": 0.4573, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.9071474154435225e-06, | |
| "loss": 0.5135, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.903956604977665e-06, | |
| "loss": 0.7619, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.900765794511806e-06, | |
| "loss": 0.6681, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.897574984045948e-06, | |
| "loss": 0.7789, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.89438417358009e-06, | |
| "loss": 0.6078, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.8911933631142316e-06, | |
| "loss": 0.4812, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.888002552648373e-06, | |
| "loss": 0.5893, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.8848117421825145e-06, | |
| "loss": 0.4775, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.881620931716657e-06, | |
| "loss": 0.5012, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.878430121250798e-06, | |
| "loss": 0.4752, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.87523931078494e-06, | |
| "loss": 0.4365, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.872048500319081e-06, | |
| "loss": 0.6722, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.8688576898532236e-06, | |
| "loss": 0.6083, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.865666879387365e-06, | |
| "loss": 0.4533, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.8624760689215065e-06, | |
| "loss": 0.5879, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.859285258455648e-06, | |
| "loss": 0.6564, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.85609444798979e-06, | |
| "loss": 0.5475, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.852903637523932e-06, | |
| "loss": 0.5018, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.849712827058073e-06, | |
| "loss": 0.4544, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.846522016592215e-06, | |
| "loss": 0.6603, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.843331206126357e-06, | |
| "loss": 0.6887, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.8401403956604985e-06, | |
| "loss": 0.7819, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.83694958519464e-06, | |
| "loss": 0.5052, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.833758774728781e-06, | |
| "loss": 0.6689, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.830567964262923e-06, | |
| "loss": 0.5564, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.827377153797065e-06, | |
| "loss": 0.3658, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.824186343331207e-06, | |
| "loss": 0.6376, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.820995532865348e-06, | |
| "loss": 0.5681, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.81780472239949e-06, | |
| "loss": 0.5974, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.814613911933632e-06, | |
| "loss": 0.5623, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.8114231014677734e-06, | |
| "loss": 0.6437, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.808232291001915e-06, | |
| "loss": 0.6442, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.8050414805360563e-06, | |
| "loss": 0.4729, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.801850670070198e-06, | |
| "loss": 0.3677, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.79865985960434e-06, | |
| "loss": 0.4295, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.7954690491384816e-06, | |
| "loss": 0.6049, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.792278238672623e-06, | |
| "loss": 0.6363, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.7890874282067645e-06, | |
| "loss": 0.5939, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.785896617740907e-06, | |
| "loss": 0.5011, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.7827058072750483e-06, | |
| "loss": 0.5177, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.77951499680919e-06, | |
| "loss": 0.7722, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.7763241863433313e-06, | |
| "loss": 0.5204, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.773133375877473e-06, | |
| "loss": 0.455, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.769942565411615e-06, | |
| "loss": 0.5397, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.7667517549457565e-06, | |
| "loss": 0.5528, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.763560944479898e-06, | |
| "loss": 0.5286, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.76037013401404e-06, | |
| "loss": 0.5475, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7571793235481818e-06, | |
| "loss": 0.3887, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7539885130823233e-06, | |
| "loss": 0.6288, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7507977026164647e-06, | |
| "loss": 0.5563, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7476068921506066e-06, | |
| "loss": 0.6103, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.744416081684748e-06, | |
| "loss": 0.4141, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.74122527121889e-06, | |
| "loss": 0.4075, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.738034460753032e-06, | |
| "loss": 0.3594, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.7348436502871733e-06, | |
| "loss": 0.5157, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.731652839821315e-06, | |
| "loss": 0.4918, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.7284620293554563e-06, | |
| "loss": 0.4456, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.7252712188895986e-06, | |
| "loss": 0.7768, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.72208040842374e-06, | |
| "loss": 0.7511, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.7188895979578815e-06, | |
| "loss": 0.4604, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.715698787492023e-06, | |
| "loss": 0.6048, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.7125079770261653e-06, | |
| "loss": 0.6261, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.709317166560307e-06, | |
| "loss": 0.7588, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.7061263560944483e-06, | |
| "loss": 0.6608, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.7029355456285897e-06, | |
| "loss": 0.5453, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.6997447351627312e-06, | |
| "loss": 0.4361, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.6965539246968735e-06, | |
| "loss": 0.5557, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.693363114231015e-06, | |
| "loss": 0.6371, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.6901723037651565e-06, | |
| "loss": 0.4953, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.686981493299298e-06, | |
| "loss": 0.4157, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.6837906828334403e-06, | |
| "loss": 0.5469, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.6805998723675817e-06, | |
| "loss": 0.4933, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.677409061901723e-06, | |
| "loss": 0.4994, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.6742182514358647e-06, | |
| "loss": 0.3726, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.6710274409700066e-06, | |
| "loss": 0.5413, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.6678366305041485e-06, | |
| "loss": 0.574, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.66464582003829e-06, | |
| "loss": 0.2569, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.6614550095724314e-06, | |
| "loss": 0.5012, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.6582641991065733e-06, | |
| "loss": 0.586, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.655073388640715e-06, | |
| "loss": 0.4588, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.6518825781748567e-06, | |
| "loss": 0.3745, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.6486917677089985e-06, | |
| "loss": 0.5444, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.64550095724314e-06, | |
| "loss": 0.5545, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.6423101467772815e-06, | |
| "loss": 0.6965, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.6391193363114234e-06, | |
| "loss": 0.4442, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.6359285258455653e-06, | |
| "loss": 0.4866, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.6327377153797067e-06, | |
| "loss": 0.5114, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.6295469049138482e-06, | |
| "loss": 0.5922, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.62635609444799e-06, | |
| "loss": 0.4787, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.623165283982132e-06, | |
| "loss": 0.6709, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.6199744735162735e-06, | |
| "loss": 0.5078, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.616783663050415e-06, | |
| "loss": 0.5999, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.6135928525845564e-06, | |
| "loss": 0.5051, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.6111997447351634e-06, | |
| "loss": 0.4373, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.608008934269305e-06, | |
| "loss": 0.7497, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.6048181238034463e-06, | |
| "loss": 0.458, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.6016273133375878e-06, | |
| "loss": 0.3981, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.59843650287173e-06, | |
| "loss": 0.4995, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.5952456924058716e-06, | |
| "loss": 0.493, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.592054881940013e-06, | |
| "loss": 0.462, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.5888640714741545e-06, | |
| "loss": 0.5239, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.585673261008296e-06, | |
| "loss": 0.4376, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.5824824505424383e-06, | |
| "loss": 0.6067, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.5792916400765798e-06, | |
| "loss": 0.4091, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.5761008296107212e-06, | |
| "loss": 0.5261, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.5729100191448627e-06, | |
| "loss": 0.5408, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.569719208679005e-06, | |
| "loss": 0.5867, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.5665283982131465e-06, | |
| "loss": 0.636, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.563337587747288e-06, | |
| "loss": 0.4329, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.56014677728143e-06, | |
| "loss": 0.7026, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.5569559668155713e-06, | |
| "loss": 0.5245, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.553765156349713e-06, | |
| "loss": 0.4929, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.5505743458838547e-06, | |
| "loss": 0.4876, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.5473835354179966e-06, | |
| "loss": 0.45, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.544192724952138e-06, | |
| "loss": 0.5068, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.54100191448628e-06, | |
| "loss": 0.5647, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.5378111040204214e-06, | |
| "loss": 0.5048, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.5346202935545633e-06, | |
| "loss": 0.457, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.5314294830887048e-06, | |
| "loss": 0.4089, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.5282386726228462e-06, | |
| "loss": 0.3521, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.5250478621569886e-06, | |
| "loss": 0.3477, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.52185705169113e-06, | |
| "loss": 0.6625, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.5186662412252715e-06, | |
| "loss": 0.3829, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.515475430759413e-06, | |
| "loss": 0.4733, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.5122846202935553e-06, | |
| "loss": 0.4024, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.5090938098276968e-06, | |
| "loss": 0.5733, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.5059029993618382e-06, | |
| "loss": 0.5788, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.5027121888959797e-06, | |
| "loss": 0.4806, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.499521378430121e-06, | |
| "loss": 0.5091, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.4963305679642635e-06, | |
| "loss": 0.6465, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.493139757498405e-06, | |
| "loss": 0.4821, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.4899489470325464e-06, | |
| "loss": 0.3563, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.486758136566688e-06, | |
| "loss": 0.7174, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.4835673261008302e-06, | |
| "loss": 0.3833, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.4803765156349717e-06, | |
| "loss": 0.6688, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.477185705169113e-06, | |
| "loss": 0.5733, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.4739948947032546e-06, | |
| "loss": 0.5743, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.4708040842373965e-06, | |
| "loss": 0.5219, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.4676132737715384e-06, | |
| "loss": 0.5964, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.46442246330568e-06, | |
| "loss": 0.5981, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.4612316528398214e-06, | |
| "loss": 0.4068, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.4580408423739632e-06, | |
| "loss": 0.3966, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.454850031908105e-06, | |
| "loss": 0.2291, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.4516592214422466e-06, | |
| "loss": 0.4695, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.4484684109763885e-06, | |
| "loss": 0.5594, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.44527760051053e-06, | |
| "loss": 0.5603, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.4420867900446714e-06, | |
| "loss": 0.4934, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.4388959795788133e-06, | |
| "loss": 0.6316, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.4357051691129552e-06, | |
| "loss": 0.3424, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.4325143586470967e-06, | |
| "loss": 0.566, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.429323548181238e-06, | |
| "loss": 0.3565, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.42613273771538e-06, | |
| "loss": 0.5191, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.422941927249522e-06, | |
| "loss": 0.3848, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.4197511167836634e-06, | |
| "loss": 0.6962, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.416560306317805e-06, | |
| "loss": 0.3646, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.4133694958519464e-06, | |
| "loss": 0.3756, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.4101786853860887e-06, | |
| "loss": 0.2853, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.40698787492023e-06, | |
| "loss": 0.2925, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.4037970644543716e-06, | |
| "loss": 0.3838, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.400606253988513e-06, | |
| "loss": 0.4479, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.3974154435226554e-06, | |
| "loss": 0.5207, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.394224633056797e-06, | |
| "loss": 0.3813, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.3910338225909384e-06, | |
| "loss": 0.4028, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.38784301212508e-06, | |
| "loss": 0.3406, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.3846522016592213e-06, | |
| "loss": 0.4495, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.3814613911933636e-06, | |
| "loss": 0.5411, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.378270580727505e-06, | |
| "loss": 0.3533, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.3750797702616465e-06, | |
| "loss": 0.5577, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.371888959795788e-06, | |
| "loss": 0.4198, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.3686981493299303e-06, | |
| "loss": 0.2956, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.365507338864072e-06, | |
| "loss": 0.5714, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.3623165283982133e-06, | |
| "loss": 0.3393, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.359125717932355e-06, | |
| "loss": 0.3448, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.3559349074664966e-06, | |
| "loss": 0.4956, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.3527440970006385e-06, | |
| "loss": 0.4609, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.34955328653478e-06, | |
| "loss": 0.4499, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.346362476068922e-06, | |
| "loss": 0.3638, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.3431716656030634e-06, | |
| "loss": 0.6062, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.3399808551372053e-06, | |
| "loss": 0.319, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.3367900446713467e-06, | |
| "loss": 0.3106, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.3335992342054886e-06, | |
| "loss": 0.6715, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.33040842373963e-06, | |
| "loss": 0.4007, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.3272176132737716e-06, | |
| "loss": 0.5854, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.324026802807914e-06, | |
| "loss": 0.4384, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.3208359923420554e-06, | |
| "loss": 0.5186, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.317645181876197e-06, | |
| "loss": 0.2793, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.3144543714103383e-06, | |
| "loss": 0.3945, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.3112635609444806e-06, | |
| "loss": 0.4488, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.308072750478622e-06, | |
| "loss": 0.2692, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.3048819400127635e-06, | |
| "loss": 0.4689, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.301691129546905e-06, | |
| "loss": 0.2162, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.2985003190810465e-06, | |
| "loss": 0.3339, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.295309508615189e-06, | |
| "loss": 0.5855, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.2921186981493303e-06, | |
| "loss": 0.4823, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.2889278876834717e-06, | |
| "loss": 0.3587, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.2857370772176132e-06, | |
| "loss": 0.3903, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.2825462667517555e-06, | |
| "loss": 0.5669, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.279355456285897e-06, | |
| "loss": 0.413, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.2761646458200385e-06, | |
| "loss": 0.3735, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.27297383535418e-06, | |
| "loss": 0.5467, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.269783024888322e-06, | |
| "loss": 0.3738, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.2665922144224637e-06, | |
| "loss": 0.4619, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.263401403956605e-06, | |
| "loss": 0.3739, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.2602105934907467e-06, | |
| "loss": 0.4065, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.2570197830248886e-06, | |
| "loss": 0.3406, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.2538289725590305e-06, | |
| "loss": 0.4554, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.250638162093172e-06, | |
| "loss": 0.799, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.247447351627314e-06, | |
| "loss": 0.4552, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.2442565411614553e-06, | |
| "loss": 0.3708, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.2410657306955968e-06, | |
| "loss": 0.3309, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.2378749202297387e-06, | |
| "loss": 0.4065, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.2346841097638806e-06, | |
| "loss": 0.627, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.231493299298022e-06, | |
| "loss": 0.3551, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.2283024888321635e-06, | |
| "loss": 0.2465, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.2251116783663054e-06, | |
| "loss": 0.5104, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.2219208679004473e-06, | |
| "loss": 0.5923, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.2187300574345887e-06, | |
| "loss": 0.3455, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.2155392469687302e-06, | |
| "loss": 0.3767, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.2123484365028717e-06, | |
| "loss": 0.4728, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.209157626037014e-06, | |
| "loss": 0.4427, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.2059668155711555e-06, | |
| "loss": 0.3805, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.202776005105297e-06, | |
| "loss": 0.2417, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.1995851946394384e-06, | |
| "loss": 0.4459, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.1963943841735807e-06, | |
| "loss": 0.5951, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.193203573707722e-06, | |
| "loss": 0.4512, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.1900127632418637e-06, | |
| "loss": 0.4038, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.186821952776005e-06, | |
| "loss": 0.5716, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.1836311423101466e-06, | |
| "loss": 0.3122, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.180440331844289e-06, | |
| "loss": 0.6523, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.1772495213784304e-06, | |
| "loss": 0.4129, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.174058710912572e-06, | |
| "loss": 0.7674, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.1708679004467138e-06, | |
| "loss": 0.5227, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.1676770899808557e-06, | |
| "loss": 0.457, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.164486279514997e-06, | |
| "loss": 0.3279, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.1612954690491386e-06, | |
| "loss": 0.4809, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.1581046585832805e-06, | |
| "loss": 0.3513, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.154913848117422e-06, | |
| "loss": 0.5097, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.151723037651564e-06, | |
| "loss": 0.4727, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.1485322271857053e-06, | |
| "loss": 0.3848, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.1453414167198472e-06, | |
| "loss": 0.49, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.1421506062539887e-06, | |
| "loss": 0.4166, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.1389597957881306e-06, | |
| "loss": 0.4397, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.135768985322272e-06, | |
| "loss": 0.4295, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.132578174856414e-06, | |
| "loss": 0.3807, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.1293873643905554e-06, | |
| "loss": 0.5155, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.126196553924697e-06, | |
| "loss": 0.4183, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.123005743458839e-06, | |
| "loss": 0.4173, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.1198149329929807e-06, | |
| "loss": 0.5842, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.116624122527122e-06, | |
| "loss": 0.6673, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.1134333120612636e-06, | |
| "loss": 0.4492, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.110242501595406e-06, | |
| "loss": 0.4747, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.1070516911295474e-06, | |
| "loss": 0.5746, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.103860880663689e-06, | |
| "loss": 0.5708, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.1006700701978303e-06, | |
| "loss": 0.601, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.097479259731972e-06, | |
| "loss": 0.7205, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.094288449266114e-06, | |
| "loss": 0.4023, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.0910976388002556e-06, | |
| "loss": 0.5012, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.087906828334397e-06, | |
| "loss": 0.4779, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.0847160178685385e-06, | |
| "loss": 0.5062, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.081525207402681e-06, | |
| "loss": 0.2888, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.0783343969368223e-06, | |
| "loss": 0.4121, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.075143586470964e-06, | |
| "loss": 0.4721, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.0719527760051053e-06, | |
| "loss": 0.3422, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.068761965539247e-06, | |
| "loss": 0.5018, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.065571155073389e-06, | |
| "loss": 0.5165, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.0623803446075305e-06, | |
| "loss": 0.3574, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.059189534141672e-06, | |
| "loss": 0.579, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.055998723675814e-06, | |
| "loss": 0.4961, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.0528079132099558e-06, | |
| "loss": 0.3949, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.0496171027440973e-06, | |
| "loss": 0.3999, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.046426292278239e-06, | |
| "loss": 0.524, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.0432354818123806e-06, | |
| "loss": 0.3688, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.040044671346522e-06, | |
| "loss": 0.403, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.036853860880664e-06, | |
| "loss": 0.3388, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.033663050414806e-06, | |
| "loss": 0.4706, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.0304722399489473e-06, | |
| "loss": 0.6817, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.027281429483089e-06, | |
| "loss": 0.3896, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.0240906190172307e-06, | |
| "loss": 0.358, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.0208998085513726e-06, | |
| "loss": 0.3115, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.017708998085514e-06, | |
| "loss": 0.5322, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.0145181876196555e-06, | |
| "loss": 0.4613, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.011327377153797e-06, | |
| "loss": 0.4374, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.0081365666879393e-06, | |
| "loss": 0.4775, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.004945756222081e-06, | |
| "loss": 0.349, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.0017549457562223e-06, | |
| "loss": 0.5114, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.9985641352903637e-06, | |
| "loss": 0.3901, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.995373324824506e-06, | |
| "loss": 0.4756, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.9921825143586475e-06, | |
| "loss": 0.4669, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.988991703892789e-06, | |
| "loss": 0.5554, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.9858008934269305e-06, | |
| "loss": 0.3345, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.982610082961072e-06, | |
| "loss": 0.3653, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.9794192724952143e-06, | |
| "loss": 0.4543, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.9762284620293557e-06, | |
| "loss": 0.382, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.973037651563497e-06, | |
| "loss": 0.2821, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.969846841097639e-06, | |
| "loss": 0.4392, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.966656030631781e-06, | |
| "loss": 0.3785, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.9634652201659224e-06, | |
| "loss": 0.4799, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.960274409700064e-06, | |
| "loss": 0.4004, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.957083599234206e-06, | |
| "loss": 0.4598, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.9538927887683473e-06, | |
| "loss": 0.6889, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.950701978302489e-06, | |
| "loss": 0.3401, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.9475111678366306e-06, | |
| "loss": 0.5162, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.9443203573707725e-06, | |
| "loss": 0.3811, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.941129546904914e-06, | |
| "loss": 0.3048, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.937938736439056e-06, | |
| "loss": 0.5528, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.9347479259731974e-06, | |
| "loss": 0.3721, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.9315571155073393e-06, | |
| "loss": 0.4877, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.9283663050414807e-06, | |
| "loss": 0.3101, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.925175494575622e-06, | |
| "loss": 0.3458, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.9219846841097645e-06, | |
| "loss": 0.3741, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.918793873643906e-06, | |
| "loss": 0.7428, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.9156030631780475e-06, | |
| "loss": 0.3487, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.912412252712189e-06, | |
| "loss": 0.3184, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.9092214422463313e-06, | |
| "loss": 0.3778, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.9060306317804727e-06, | |
| "loss": 0.4085, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.902839821314614e-06, | |
| "loss": 0.4859, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.8996490108487557e-06, | |
| "loss": 0.5783, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.896458200382897e-06, | |
| "loss": 0.2396, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.8932673899170395e-06, | |
| "loss": 0.6563, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.890076579451181e-06, | |
| "loss": 0.3048, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.8868857689853224e-06, | |
| "loss": 0.3935, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.883694958519464e-06, | |
| "loss": 0.5063, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.880504148053606e-06, | |
| "loss": 0.5056, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.8773133375877476e-06, | |
| "loss": 0.2203, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.874122527121889e-06, | |
| "loss": 0.3044, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.8709317166560306e-06, | |
| "loss": 0.4444, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.8677409061901725e-06, | |
| "loss": 0.3645, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.8645500957243144e-06, | |
| "loss": 0.4594, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.861359285258456e-06, | |
| "loss": 0.4897, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.8581684747925973e-06, | |
| "loss": 0.4772, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.854977664326739e-06, | |
| "loss": 0.388, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.851786853860881e-06, | |
| "loss": 0.3869, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.8485960433950226e-06, | |
| "loss": 0.4853, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.8454052329291645e-06, | |
| "loss": 0.4467, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.842214422463306e-06, | |
| "loss": 0.2356, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.8390236119974474e-06, | |
| "loss": 0.4614, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.8358328015315893e-06, | |
| "loss": 0.3212, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.832641991065731e-06, | |
| "loss": 0.5037, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.8294511805998727e-06, | |
| "loss": 0.4957, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.826260370134014e-06, | |
| "loss": 0.418, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.823069559668156e-06, | |
| "loss": 0.2996, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.819878749202298e-06, | |
| "loss": 0.5421, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.8166879387364394e-06, | |
| "loss": 0.5049, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.813497128270581e-06, | |
| "loss": 0.3929, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.8103063178047223e-06, | |
| "loss": 0.4045, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.8071155073388646e-06, | |
| "loss": 0.3494, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.803924696873006e-06, | |
| "loss": 0.3782, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.8007338864071476e-06, | |
| "loss": 0.2768, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.797543075941289e-06, | |
| "loss": 0.531, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.7943522654754314e-06, | |
| "loss": 0.4958, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.791161455009573e-06, | |
| "loss": 0.6183, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.7879706445437143e-06, | |
| "loss": 0.3521, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.7847798340778558e-06, | |
| "loss": 0.4406, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.7815890236119973e-06, | |
| "loss": 0.4131, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.7783982131461396e-06, | |
| "loss": 0.5107, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.775207402680281e-06, | |
| "loss": 0.2735, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.7720165922144225e-06, | |
| "loss": 0.3788, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.7696234843650286e-06, | |
| "loss": 0.5599, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.7664326738991705e-06, | |
| "loss": 0.2355, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.7632418634333124e-06, | |
| "loss": 0.5358, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.760051052967454e-06, | |
| "loss": 0.3283, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.7568602425015958e-06, | |
| "loss": 0.4093, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.7536694320357372e-06, | |
| "loss": 0.287, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.750478621569879e-06, | |
| "loss": 0.5271, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.7472878111040206e-06, | |
| "loss": 0.3372, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.7440970006381625e-06, | |
| "loss": 0.5649, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.740906190172304e-06, | |
| "loss": 0.5017, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.7377153797064454e-06, | |
| "loss": 0.6057, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.7345245692405873e-06, | |
| "loss": 0.4184, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.7313337587747292e-06, | |
| "loss": 0.2892, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.7281429483088707e-06, | |
| "loss": 0.5914, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.724952137843012e-06, | |
| "loss": 0.472, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.721761327377154e-06, | |
| "loss": 0.3773, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.718570516911296e-06, | |
| "loss": 0.2942, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.7153797064454374e-06, | |
| "loss": 0.3445, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.712188895979579e-06, | |
| "loss": 0.2773, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.7089980855137204e-06, | |
| "loss": 0.4007, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.7058072750478627e-06, | |
| "loss": 0.3083, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.702616464582004e-06, | |
| "loss": 0.4782, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6994256541161456e-06, | |
| "loss": 0.5419, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.696234843650287e-06, | |
| "loss": 0.5713, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.6930440331844294e-06, | |
| "loss": 0.3722, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.689853222718571e-06, | |
| "loss": 0.4663, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.6866624122527123e-06, | |
| "loss": 0.3208, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.683471601786854e-06, | |
| "loss": 0.351, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.6802807913209957e-06, | |
| "loss": 0.513, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.6770899808551376e-06, | |
| "loss": 0.4409, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.673899170389279e-06, | |
| "loss": 0.3335, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.6707083599234205e-06, | |
| "loss": 0.3706, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.6675175494575624e-06, | |
| "loss": 0.4404, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.6643267389917043e-06, | |
| "loss": 0.4186, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.661135928525846e-06, | |
| "loss": 0.3666, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.6579451180599873e-06, | |
| "loss": 0.3849, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.654754307594129e-06, | |
| "loss": 0.4564, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.6515634971282706e-06, | |
| "loss": 0.3534, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.6483726866624125e-06, | |
| "loss": 0.3735, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.645181876196554e-06, | |
| "loss": 0.4449, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.641991065730696e-06, | |
| "loss": 0.5032, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.6388002552648374e-06, | |
| "loss": 0.3677, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.6356094447989793e-06, | |
| "loss": 0.5004, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.632418634333121e-06, | |
| "loss": 0.1972, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.6292278238672626e-06, | |
| "loss": 0.4606, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.626037013401404e-06, | |
| "loss": 0.3533, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.6228462029355456e-06, | |
| "loss": 0.3607, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.619655392469688e-06, | |
| "loss": 0.5767, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.6164645820038293e-06, | |
| "loss": 0.5316, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.613273771537971e-06, | |
| "loss": 0.2474, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.6100829610721123e-06, | |
| "loss": 0.3168, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.6068921506062546e-06, | |
| "loss": 0.4029, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.603701340140396e-06, | |
| "loss": 0.2693, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.6005105296745375e-06, | |
| "loss": 0.3756, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.597319719208679e-06, | |
| "loss": 0.3712, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5941289087428205e-06, | |
| "loss": 0.366, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.590938098276963e-06, | |
| "loss": 0.3813, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.5877472878111043e-06, | |
| "loss": 0.4442, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.5845564773452457e-06, | |
| "loss": 0.4061, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.581365666879387e-06, | |
| "loss": 0.3679, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.5781748564135295e-06, | |
| "loss": 0.2641, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.574984045947671e-06, | |
| "loss": 0.5656, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.5717932354818125e-06, | |
| "loss": 0.3672, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.568602425015954e-06, | |
| "loss": 0.3395, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.565411614550096e-06, | |
| "loss": 0.5946, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.5622208040842377e-06, | |
| "loss": 0.3526, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.559029993618379e-06, | |
| "loss": 0.3365, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.555839183152521e-06, | |
| "loss": 0.4003, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.5526483726866626e-06, | |
| "loss": 0.3994, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.5494575622208045e-06, | |
| "loss": 0.3623, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.546266751754946e-06, | |
| "loss": 0.5994, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.543075941289088e-06, | |
| "loss": 0.3717, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.5398851308232293e-06, | |
| "loss": 0.2424, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.5366943203573708e-06, | |
| "loss": 0.5083, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.5335035098915127e-06, | |
| "loss": 0.2865, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.5303126994256545e-06, | |
| "loss": 0.2184, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.527121888959796e-06, | |
| "loss": 0.5697, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.5239310784939375e-06, | |
| "loss": 0.3524, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.5207402680280794e-06, | |
| "loss": 0.3922, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.5175494575622213e-06, | |
| "loss": 0.3364, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.5143586470963627e-06, | |
| "loss": 0.3983, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.5111678366305042e-06, | |
| "loss": 0.3812, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.5079770261646457e-06, | |
| "loss": 0.3001, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.504786215698788e-06, | |
| "loss": 0.3159, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.5015954052329295e-06, | |
| "loss": 0.316, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.498404594767071e-06, | |
| "loss": 0.2347, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.495213784301213e-06, | |
| "loss": 0.4728, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.4920229738353543e-06, | |
| "loss": 0.4451, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.488832163369496e-06, | |
| "loss": 0.6876, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.4856413529036377e-06, | |
| "loss": 0.4799, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.482450542437779e-06, | |
| "loss": 0.3912, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.479259731971921e-06, | |
| "loss": 0.2295, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.4760689215060625e-06, | |
| "loss": 0.2529, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.4728781110402044e-06, | |
| "loss": 0.454, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.469687300574346e-06, | |
| "loss": 0.3894, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.4664964901084878e-06, | |
| "loss": 0.2908, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.4633056796426292e-06, | |
| "loss": 0.499, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.460114869176771e-06, | |
| "loss": 0.3336, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.4569240587109126e-06, | |
| "loss": 0.3016, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.4537332482450545e-06, | |
| "loss": 0.4519, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.450542437779196e-06, | |
| "loss": 0.2589, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.447351627313338e-06, | |
| "loss": 0.404, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.4441608168474793e-06, | |
| "loss": 0.335, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.4409700063816212e-06, | |
| "loss": 0.4312, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.437779195915763e-06, | |
| "loss": 0.2877, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.4345883854499046e-06, | |
| "loss": 0.3591, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.4313975749840465e-06, | |
| "loss": 0.3149, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.428206764518188e-06, | |
| "loss": 0.3785, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.4250159540523294e-06, | |
| "loss": 0.3654, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.4218251435864713e-06, | |
| "loss": 0.2894, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.4186343331206128e-06, | |
| "loss": 0.5198, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.4154435226547547e-06, | |
| "loss": 0.4666, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.412252712188896e-06, | |
| "loss": 0.3899, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.409061901723038e-06, | |
| "loss": 0.4248, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.4058710912571795e-06, | |
| "loss": 0.3144, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.4026802807913214e-06, | |
| "loss": 0.3294, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.399489470325463e-06, | |
| "loss": 0.3395, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3962986598596043e-06, | |
| "loss": 0.4384, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3931078493937462e-06, | |
| "loss": 0.3029, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3899170389278877e-06, | |
| "loss": 0.3868, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.3867262284620296e-06, | |
| "loss": 0.233, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.383535417996171e-06, | |
| "loss": 0.4025, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.380344607530313e-06, | |
| "loss": 0.2714, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.3771537970644544e-06, | |
| "loss": 0.4694, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.3739629865985963e-06, | |
| "loss": 0.3092, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.370772176132738e-06, | |
| "loss": 0.3375, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.3675813656668793e-06, | |
| "loss": 0.2356, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.364390555201021e-06, | |
| "loss": 0.4403, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.3611997447351626e-06, | |
| "loss": 0.4015, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.3580089342693045e-06, | |
| "loss": 0.5201, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.3548181238034464e-06, | |
| "loss": 0.4203, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.351627313337588e-06, | |
| "loss": 0.4869, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.3484365028717298e-06, | |
| "loss": 0.3923, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.3452456924058712e-06, | |
| "loss": 0.6743, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.342054881940013e-06, | |
| "loss": 0.2588, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.3388640714741546e-06, | |
| "loss": 0.323, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.3356732610082965e-06, | |
| "loss": 0.2859, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.332482450542438e-06, | |
| "loss": 0.2747, | |
| "step": 3572 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.32929164007658e-06, | |
| "loss": 0.2221, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.3261008296107213e-06, | |
| "loss": 0.3744, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.3229100191448632e-06, | |
| "loss": 0.3965, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.3197192086790047e-06, | |
| "loss": 0.4889, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.3165283982131466e-06, | |
| "loss": 0.4218, | |
| "step": 3592 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.313337587747288e-06, | |
| "loss": 0.3016, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.3101467772814295e-06, | |
| "loss": 0.3408, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.3069559668155714e-06, | |
| "loss": 0.387, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.303765156349713e-06, | |
| "loss": 0.3845, | |
| "step": 3608 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.300574345883855e-06, | |
| "loss": 0.2885, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2973835354179963e-06, | |
| "loss": 0.1871, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.294192724952138e-06, | |
| "loss": 0.3516, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2910019144862796e-06, | |
| "loss": 0.4165, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2878111040204215e-06, | |
| "loss": 0.2891, | |
| "step": 3628 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.284620293554563e-06, | |
| "loss": 0.3616, | |
| "step": 3632 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.2814294830887045e-06, | |
| "loss": 0.4057, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.2782386726228464e-06, | |
| "loss": 0.5166, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.275047862156988e-06, | |
| "loss": 0.3279, | |
| "step": 3644 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.2718570516911297e-06, | |
| "loss": 0.3537, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.268666241225271e-06, | |
| "loss": 0.3187, | |
| "step": 3652 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.265475430759413e-06, | |
| "loss": 0.4043, | |
| "step": 3656 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.2622846202935546e-06, | |
| "loss": 0.2799, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.2590938098276964e-06, | |
| "loss": 0.3363, | |
| "step": 3664 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.255902999361838e-06, | |
| "loss": 0.6477, | |
| "step": 3668 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.25271218889598e-06, | |
| "loss": 0.4967, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.2495213784301213e-06, | |
| "loss": 0.4474, | |
| "step": 3676 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.246330567964263e-06, | |
| "loss": 0.2501, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.2431397574984046e-06, | |
| "loss": 0.3448, | |
| "step": 3684 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.2399489470325465e-06, | |
| "loss": 0.3084, | |
| "step": 3688 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.2367581365666884e-06, | |
| "loss": 0.3165, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.23356732610083e-06, | |
| "loss": 0.405, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.230376515634972e-06, | |
| "loss": 0.3648, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.2271857051691133e-06, | |
| "loss": 0.2938, | |
| "step": 3704 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.2239948947032547e-06, | |
| "loss": 0.336, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.2208040842373966e-06, | |
| "loss": 0.4741, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.217613273771538e-06, | |
| "loss": 0.4006, | |
| "step": 3716 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.21442246330568e-06, | |
| "loss": 0.3443, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.2112316528398215e-06, | |
| "loss": 0.2771, | |
| "step": 3724 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.2080408423739634e-06, | |
| "loss": 0.2515, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.204850031908105e-06, | |
| "loss": 0.3897, | |
| "step": 3732 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.2016592214422467e-06, | |
| "loss": 0.182, | |
| "step": 3736 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.198468410976388e-06, | |
| "loss": 0.3575, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1952776005105297e-06, | |
| "loss": 0.3662, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1920867900446716e-06, | |
| "loss": 0.4394, | |
| "step": 3748 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.188895979578813e-06, | |
| "loss": 0.3541, | |
| "step": 3752 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.185705169112955e-06, | |
| "loss": 0.3837, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1825143586470964e-06, | |
| "loss": 0.2765, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.1793235481812383e-06, | |
| "loss": 0.3349, | |
| "step": 3764 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.1761327377153797e-06, | |
| "loss": 0.3141, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.1729419272495216e-06, | |
| "loss": 0.3836, | |
| "step": 3772 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.169751116783663e-06, | |
| "loss": 0.417, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.1665603063178046e-06, | |
| "loss": 0.339, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.1633694958519465e-06, | |
| "loss": 0.4287, | |
| "step": 3784 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.1601786853860884e-06, | |
| "loss": 0.3423, | |
| "step": 3788 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.15698787492023e-06, | |
| "loss": 0.3367, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.1537970644543717e-06, | |
| "loss": 0.2519, | |
| "step": 3796 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.150606253988513e-06, | |
| "loss": 0.3884, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.147415443522655e-06, | |
| "loss": 0.2767, | |
| "step": 3804 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.1442246330567966e-06, | |
| "loss": 0.3162, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.1410338225909385e-06, | |
| "loss": 0.3722, | |
| "step": 3812 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.13784301212508e-06, | |
| "loss": 0.462, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.134652201659222e-06, | |
| "loss": 0.4508, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.1314613911933633e-06, | |
| "loss": 0.309, | |
| "step": 3824 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.128270580727505e-06, | |
| "loss": 0.4566, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.1250797702616467e-06, | |
| "loss": 0.3216, | |
| "step": 3832 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.1218889597957886e-06, | |
| "loss": 0.4669, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.11869814932993e-06, | |
| "loss": 0.4764, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.115507338864072e-06, | |
| "loss": 0.3011, | |
| "step": 3844 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.1123165283982134e-06, | |
| "loss": 0.3308, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.109125717932355e-06, | |
| "loss": 0.4038, | |
| "step": 3852 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.1059349074664967e-06, | |
| "loss": 0.2768, | |
| "step": 3856 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.1027440970006382e-06, | |
| "loss": 0.374, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.09955328653478e-06, | |
| "loss": 0.3393, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0963624760689216e-06, | |
| "loss": 0.3846, | |
| "step": 3868 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0931716656030635e-06, | |
| "loss": 0.308, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.089980855137205e-06, | |
| "loss": 0.4816, | |
| "step": 3876 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.086790044671347e-06, | |
| "loss": 0.2121, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0835992342054883e-06, | |
| "loss": 0.3698, | |
| "step": 3884 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0804084237396298e-06, | |
| "loss": 0.3615, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.0772176132737717e-06, | |
| "loss": 0.2294, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.074026802807913e-06, | |
| "loss": 0.2515, | |
| "step": 3896 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.070835992342055e-06, | |
| "loss": 0.3559, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.0676451818761965e-06, | |
| "loss": 0.4243, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.0644543714103384e-06, | |
| "loss": 0.3622, | |
| "step": 3908 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.06126356094448e-06, | |
| "loss": 0.5588, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0580727504786218e-06, | |
| "loss": 0.2169, | |
| "step": 3916 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0548819400127632e-06, | |
| "loss": 0.4732, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.051691129546905e-06, | |
| "loss": 0.2331, | |
| "step": 3924 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0485003190810466e-06, | |
| "loss": 0.3388, | |
| "step": 3928 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0453095086151885e-06, | |
| "loss": 0.4545, | |
| "step": 3932 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.04211869814933e-06, | |
| "loss": 0.3886, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.038927887683472e-06, | |
| "loss": 0.2233, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.0357370772176138e-06, | |
| "loss": 0.3658, | |
| "step": 3944 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.0325462667517552e-06, | |
| "loss": 0.3229, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.029355456285897e-06, | |
| "loss": 0.1759, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.0261646458200386e-06, | |
| "loss": 0.3737, | |
| "step": 3956 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.02297383535418e-06, | |
| "loss": 0.3362, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.019783024888322e-06, | |
| "loss": 0.2873, | |
| "step": 3964 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.0165922144224634e-06, | |
| "loss": 0.3454, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.0134014039566053e-06, | |
| "loss": 0.3428, | |
| "step": 3972 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.0102105934907468e-06, | |
| "loss": 0.4089, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.0070197830248887e-06, | |
| "loss": 0.3472, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.00382897255903e-06, | |
| "loss": 0.2868, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.000638162093172e-06, | |
| "loss": 0.3088, | |
| "step": 3988 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9974473516273135e-06, | |
| "loss": 0.2471, | |
| "step": 3992 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.994256541161455e-06, | |
| "loss": 0.2816, | |
| "step": 3996 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.991065730695597e-06, | |
| "loss": 0.3135, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9878749202297383e-06, | |
| "loss": 0.379, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9846841097638802e-06, | |
| "loss": 0.5225, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9814932992980217e-06, | |
| "loss": 0.3229, | |
| "step": 4012 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9783024888321636e-06, | |
| "loss": 0.3573, | |
| "step": 4016 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.975111678366305e-06, | |
| "loss": 0.2219, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.971920867900447e-06, | |
| "loss": 0.2133, | |
| "step": 4024 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.9687300574345884e-06, | |
| "loss": 0.4303, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.96553924696873e-06, | |
| "loss": 0.4735, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.9631461391193364e-06, | |
| "loss": 0.3223, | |
| "step": 4036 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.959955328653478e-06, | |
| "loss": 0.3124, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.95676451818762e-06, | |
| "loss": 0.4547, | |
| "step": 4044 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.9535737077217613e-06, | |
| "loss": 0.3089, | |
| "step": 4048 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.950382897255903e-06, | |
| "loss": 0.344, | |
| "step": 4052 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.947192086790045e-06, | |
| "loss": 0.1488, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.9440012763241865e-06, | |
| "loss": 0.4715, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.9408104658583284e-06, | |
| "loss": 0.2866, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.93761965539247e-06, | |
| "loss": 0.3207, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.9344288449266118e-06, | |
| "loss": 0.3532, | |
| "step": 4072 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.9312380344607532e-06, | |
| "loss": 0.3416, | |
| "step": 4076 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.928047223994895e-06, | |
| "loss": 0.6239, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.9248564135290366e-06, | |
| "loss": 0.1806, | |
| "step": 4084 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.9216656030631785e-06, | |
| "loss": 0.3065, | |
| "step": 4088 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.91847479259732e-06, | |
| "loss": 0.2393, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.9152839821314614e-06, | |
| "loss": 0.4581, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.9120931716656033e-06, | |
| "loss": 0.2407, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.908902361199745e-06, | |
| "loss": 0.3328, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.9057115507338867e-06, | |
| "loss": 0.2898, | |
| "step": 4108 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.9025207402680282e-06, | |
| "loss": 0.5888, | |
| "step": 4112 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.89932992980217e-06, | |
| "loss": 0.3909, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8961391193363115e-06, | |
| "loss": 0.2613, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8929483088704534e-06, | |
| "loss": 0.2594, | |
| "step": 4124 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.889757498404595e-06, | |
| "loss": 0.3601, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8865666879387366e-06, | |
| "loss": 0.1791, | |
| "step": 4132 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8833758774728783e-06, | |
| "loss": 0.3714, | |
| "step": 4136 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.88018506700702e-06, | |
| "loss": 0.3601, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8769942565411616e-06, | |
| "loss": 0.4697, | |
| "step": 4144 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8738034460753033e-06, | |
| "loss": 0.4277, | |
| "step": 4148 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.870612635609445e-06, | |
| "loss": 0.4183, | |
| "step": 4152 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.8674218251435867e-06, | |
| "loss": 0.2764, | |
| "step": 4156 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.8642310146777281e-06, | |
| "loss": 0.3209, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.86104020421187e-06, | |
| "loss": 0.328, | |
| "step": 4164 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.8578493937460115e-06, | |
| "loss": 0.3673, | |
| "step": 4168 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8546585832801534e-06, | |
| "loss": 0.2856, | |
| "step": 4172 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8514677728142949e-06, | |
| "loss": 0.4248, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8482769623484368e-06, | |
| "loss": 0.419, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8450861518825782e-06, | |
| "loss": 0.3315, | |
| "step": 4184 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8418953414167201e-06, | |
| "loss": 0.3508, | |
| "step": 4188 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8387045309508616e-06, | |
| "loss": 0.2016, | |
| "step": 4192 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.8355137204850033e-06, | |
| "loss": 0.2352, | |
| "step": 4196 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.832322910019145e-06, | |
| "loss": 0.4638, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.8291320995532866e-06, | |
| "loss": 0.4352, | |
| "step": 4204 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.8259412890874283e-06, | |
| "loss": 0.4832, | |
| "step": 4208 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.82275047862157e-06, | |
| "loss": 0.295, | |
| "step": 4212 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8195596681557117e-06, | |
| "loss": 0.3176, | |
| "step": 4216 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8163688576898534e-06, | |
| "loss": 0.0922, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.813178047223995e-06, | |
| "loss": 0.2375, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8099872367581367e-06, | |
| "loss": 0.3374, | |
| "step": 4228 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8067964262922782e-06, | |
| "loss": 0.2551, | |
| "step": 4232 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.80360561582642e-06, | |
| "loss": 0.3228, | |
| "step": 4236 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.8004148053605616e-06, | |
| "loss": 0.3102, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7972239948947035e-06, | |
| "loss": 0.2471, | |
| "step": 4244 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.794033184428845e-06, | |
| "loss": 0.285, | |
| "step": 4248 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7908423739629868e-06, | |
| "loss": 0.3468, | |
| "step": 4252 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7876515634971283e-06, | |
| "loss": 0.2877, | |
| "step": 4256 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7844607530312702e-06, | |
| "loss": 0.4362, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7812699425654117e-06, | |
| "loss": 0.1789, | |
| "step": 4264 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7780791320995533e-06, | |
| "loss": 0.3056, | |
| "step": 4268 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.774888321633695e-06, | |
| "loss": 0.478, | |
| "step": 4272 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7716975111678367e-06, | |
| "loss": 0.3405, | |
| "step": 4276 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.7685067007019786e-06, | |
| "loss": 0.2038, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.76531589023612e-06, | |
| "loss": 0.2301, | |
| "step": 4284 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.762125079770262e-06, | |
| "loss": 0.3283, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.7589342693044034e-06, | |
| "loss": 0.1711, | |
| "step": 4292 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.7557434588385453e-06, | |
| "loss": 0.241, | |
| "step": 4296 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.7525526483726868e-06, | |
| "loss": 0.2408, | |
| "step": 4300 | |
| } | |
| ], | |
| "logging_steps": 4, | |
| "max_steps": 6468, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "total_flos": 43550404509696.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |