| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.993997599039616, | |
| "eval_steps": 500, | |
| "global_step": 1248, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.024009603841536616, | |
| "grad_norm": 20.687761793962967, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 3.041, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04801920768307323, | |
| "grad_norm": 9.794234746287575, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 2.6099, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07202881152460984, | |
| "grad_norm": 6.5777988620330365, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 2.3365, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09603841536614646, | |
| "grad_norm": 6.29761081804441, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 2.2801, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12004801920768307, | |
| "grad_norm": 5.520168810289388, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.1745, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14405762304921968, | |
| "grad_norm": 5.123098743767706, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 2.1269, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16806722689075632, | |
| "grad_norm": 5.422361829855033, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 2.2153, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19207683073229292, | |
| "grad_norm": 5.3572698260571885, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 2.1211, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21608643457382953, | |
| "grad_norm": 5.047733772943945, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 2.1216, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.24009603841536614, | |
| "grad_norm": 4.718768637017437, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.1262, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26410564225690275, | |
| "grad_norm": 4.919713507188908, | |
| "learning_rate": 8.8e-06, | |
| "loss": 2.2468, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.28811524609843936, | |
| "grad_norm": 4.677146498377886, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 2.1325, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31212484993997597, | |
| "grad_norm": 5.466016358856625, | |
| "learning_rate": 9.999510882536288e-06, | |
| "loss": 2.1459, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33613445378151263, | |
| "grad_norm": 4.440556392680108, | |
| "learning_rate": 9.995598516974005e-06, | |
| "loss": 2.2798, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.36014405762304924, | |
| "grad_norm": 4.004727778029605, | |
| "learning_rate": 9.987776847469797e-06, | |
| "loss": 2.1991, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.38415366146458585, | |
| "grad_norm": 4.543742816180648, | |
| "learning_rate": 9.976051994868506e-06, | |
| "loss": 2.337, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 4.205969855939202, | |
| "learning_rate": 9.960433134449601e-06, | |
| "loss": 2.1878, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.43217286914765907, | |
| "grad_norm": 4.05967628095397, | |
| "learning_rate": 9.940932488747054e-06, | |
| "loss": 2.1711, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4561824729891957, | |
| "grad_norm": 3.851467643989681, | |
| "learning_rate": 9.917565317984614e-06, | |
| "loss": 2.2059, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4801920768307323, | |
| "grad_norm": 3.6455587183088203, | |
| "learning_rate": 9.890349908133914e-06, | |
| "loss": 2.262, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5042016806722689, | |
| "grad_norm": 3.8411593313415393, | |
| "learning_rate": 9.859307556604794e-06, | |
| "loss": 2.1945, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5282112845138055, | |
| "grad_norm": 4.236728754680901, | |
| "learning_rate": 9.824462555579019e-06, | |
| "loss": 2.2873, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5522208883553421, | |
| "grad_norm": 4.002728659906048, | |
| "learning_rate": 9.785842173000439e-06, | |
| "loss": 2.1592, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5762304921968787, | |
| "grad_norm": 3.9915206002259698, | |
| "learning_rate": 9.743476631236473e-06, | |
| "loss": 2.1277, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6002400960384153, | |
| "grad_norm": 3.7416035754534174, | |
| "learning_rate": 9.697399083427602e-06, | |
| "loss": 2.089, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6242496998799519, | |
| "grad_norm": 3.7596118293233873, | |
| "learning_rate": 9.647645587543391e-06, | |
| "loss": 2.0995, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6482593037214885, | |
| "grad_norm": 4.099393566721857, | |
| "learning_rate": 9.594255078165338e-06, | |
| "loss": 2.2086, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 3.75884213911114, | |
| "learning_rate": 9.537269336018627e-06, | |
| "loss": 2.0663, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6962785114045619, | |
| "grad_norm": 3.9076832551024006, | |
| "learning_rate": 9.476732955276637e-06, | |
| "loss": 2.2313, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7202881152460985, | |
| "grad_norm": 3.999571507578816, | |
| "learning_rate": 9.412693308663793e-06, | |
| "loss": 2.1956, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7442977190876351, | |
| "grad_norm": 3.673914733393223, | |
| "learning_rate": 9.345200510384044e-06, | |
| "loss": 2.1372, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7683073229291717, | |
| "grad_norm": 4.065856280538105, | |
| "learning_rate": 9.274307376904023e-06, | |
| "loss": 2.1247, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7923169267707083, | |
| "grad_norm": 3.437535576857318, | |
| "learning_rate": 9.200069385621528e-06, | |
| "loss": 2.1668, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 3.8590752772047447, | |
| "learning_rate": 9.122544631451703e-06, | |
| "loss": 2.2321, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 3.5809836445656886, | |
| "learning_rate": 9.041793781364898e-06, | |
| "loss": 2.0716, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8643457382953181, | |
| "grad_norm": 3.641632940529625, | |
| "learning_rate": 8.957880026911727e-06, | |
| "loss": 2.2342, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8883553421368547, | |
| "grad_norm": 3.688771072520596, | |
| "learning_rate": 8.870869034772563e-06, | |
| "loss": 2.208, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9123649459783914, | |
| "grad_norm": 4.146934930912289, | |
| "learning_rate": 8.78082889537008e-06, | |
| "loss": 2.1074, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.936374549819928, | |
| "grad_norm": 3.334175185522678, | |
| "learning_rate": 8.687830069585138e-06, | |
| "loss": 2.0091, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9603841536614646, | |
| "grad_norm": 3.830656059837796, | |
| "learning_rate": 8.591945333617622e-06, | |
| "loss": 2.248, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9843937575030012, | |
| "grad_norm": 3.9829216438871216, | |
| "learning_rate": 8.493249722035464e-06, | |
| "loss": 2.2053, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.007202881152461, | |
| "grad_norm": 3.9394668655626304, | |
| "learning_rate": 8.391820469056371e-06, | |
| "loss": 1.9717, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0312124849939976, | |
| "grad_norm": 4.550842560189379, | |
| "learning_rate": 8.287736948108197e-06, | |
| "loss": 1.413, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0552220888355341, | |
| "grad_norm": 4.356821890648689, | |
| "learning_rate": 8.181080609715309e-06, | |
| "loss": 1.3939, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.0792316926770709, | |
| "grad_norm": 4.518076349604797, | |
| "learning_rate": 8.071934917759502e-06, | |
| "loss": 1.39, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1032412965186074, | |
| "grad_norm": 3.8536102257477878, | |
| "learning_rate": 7.960385284165364e-06, | |
| "loss": 1.4119, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.127250900360144, | |
| "grad_norm": 4.701240892617297, | |
| "learning_rate": 7.846519002061208e-06, | |
| "loss": 1.341, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1512605042016806, | |
| "grad_norm": 3.896277166698315, | |
| "learning_rate": 7.730425177467854e-06, | |
| "loss": 1.4495, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.1752701080432173, | |
| "grad_norm": 5.214676571280148, | |
| "learning_rate": 7.612194659568755e-06, | |
| "loss": 1.3667, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.199279711884754, | |
| "grad_norm": 4.289971399955271, | |
| "learning_rate": 7.491919969615993e-06, | |
| "loss": 1.3402, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.2232893157262905, | |
| "grad_norm": 4.317059354915328, | |
| "learning_rate": 7.369695228527796e-06, | |
| "loss": 1.3434, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.247298919567827, | |
| "grad_norm": 3.939366941478397, | |
| "learning_rate": 7.245616083234266e-06, | |
| "loss": 1.392, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.2713085234093637, | |
| "grad_norm": 3.785565139610707, | |
| "learning_rate": 7.119779631828882e-06, | |
| "loss": 1.3443, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.2953181272509005, | |
| "grad_norm": 3.5905115574582815, | |
| "learning_rate": 6.992284347584438e-06, | |
| "loss": 1.3996, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.319327731092437, | |
| "grad_norm": 4.219125670543618, | |
| "learning_rate": 6.8632300018928046e-06, | |
| "loss": 1.3255, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.3433373349339737, | |
| "grad_norm": 4.213563188046468, | |
| "learning_rate": 6.732717586188866e-06, | |
| "loss": 1.3917, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.3673469387755102, | |
| "grad_norm": 4.010244839439909, | |
| "learning_rate": 6.600849232919707e-06, | |
| "loss": 1.4168, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.3913565426170469, | |
| "grad_norm": 3.929564306152493, | |
| "learning_rate": 6.467728135620892e-06, | |
| "loss": 1.2492, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.4153661464585834, | |
| "grad_norm": 3.9942636003807133, | |
| "learning_rate": 6.333458468162415e-06, | |
| "loss": 1.3267, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.43937575030012, | |
| "grad_norm": 4.943277008699724, | |
| "learning_rate": 6.198145303227456e-06, | |
| "loss": 1.3832, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.4633853541416566, | |
| "grad_norm": 4.255100090454613, | |
| "learning_rate": 6.0618945300877964e-06, | |
| "loss": 1.4676, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.4873949579831933, | |
| "grad_norm": 4.176877482554283, | |
| "learning_rate": 5.924812771740201e-06, | |
| "loss": 1.3791, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.51140456182473, | |
| "grad_norm": 4.58206991985806, | |
| "learning_rate": 5.787007301468637e-06, | |
| "loss": 1.3183, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.5354141656662665, | |
| "grad_norm": 4.095301189501832, | |
| "learning_rate": 5.648585958897585e-06, | |
| "loss": 1.3407, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.559423769507803, | |
| "grad_norm": 3.850464869839509, | |
| "learning_rate": 5.509657065602197e-06, | |
| "loss": 1.3666, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.5834333733493398, | |
| "grad_norm": 4.680569296847418, | |
| "learning_rate": 5.370329340341261e-06, | |
| "loss": 1.3968, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.6074429771908765, | |
| "grad_norm": 4.426880734817505, | |
| "learning_rate": 5.2307118139794015e-06, | |
| "loss": 1.3658, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.631452581032413, | |
| "grad_norm": 4.008404443022427, | |
| "learning_rate": 5.090913744164987e-06, | |
| "loss": 1.2817, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.6554621848739495, | |
| "grad_norm": 4.452596800536699, | |
| "learning_rate": 4.951044529830603e-06, | |
| "loss": 1.3149, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.6794717887154862, | |
| "grad_norm": 3.9870495810854423, | |
| "learning_rate": 4.811213625582961e-06, | |
| "loss": 1.3856, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.703481392557023, | |
| "grad_norm": 4.6711092636358424, | |
| "learning_rate": 4.671530456049225e-06, | |
| "loss": 1.3813, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.7274909963985594, | |
| "grad_norm": 4.184946891066517, | |
| "learning_rate": 4.532104330246807e-06, | |
| "loss": 1.3402, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.751500600240096, | |
| "grad_norm": 4.423869307360585, | |
| "learning_rate": 4.3930443560436346e-06, | |
| "loss": 1.3824, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.7755102040816326, | |
| "grad_norm": 3.9130866621429825, | |
| "learning_rate": 4.2544593547758214e-06, | |
| "loss": 1.391, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.7995198079231693, | |
| "grad_norm": 3.585626688576833, | |
| "learning_rate": 4.116457776089576e-06, | |
| "loss": 1.3607, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.8235294117647058, | |
| "grad_norm": 4.595114918315794, | |
| "learning_rate": 3.979147613073956e-06, | |
| "loss": 1.4228, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.8475390156062423, | |
| "grad_norm": 4.59294338671302, | |
| "learning_rate": 3.842636317750918e-06, | |
| "loss": 1.3064, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.871548619447779, | |
| "grad_norm": 4.315956805402431, | |
| "learning_rate": 3.707030716988783e-06, | |
| "loss": 1.3054, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.8955582232893158, | |
| "grad_norm": 4.494329679068256, | |
| "learning_rate": 3.5724369289048845e-06, | |
| "loss": 1.4475, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.9195678271308525, | |
| "grad_norm": 3.83422319487575, | |
| "learning_rate": 3.4389602798228942e-06, | |
| "loss": 1.2875, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.943577430972389, | |
| "grad_norm": 3.9598203386027264, | |
| "learning_rate": 3.3067052218497263e-06, | |
| "loss": 1.2589, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.9675870348139255, | |
| "grad_norm": 4.369625879638144, | |
| "learning_rate": 3.1757752511365903e-06, | |
| "loss": 1.2926, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.9915966386554622, | |
| "grad_norm": 4.297274070272408, | |
| "learning_rate": 3.046272826888097e-06, | |
| "loss": 1.3962, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.014405762304922, | |
| "grad_norm": 3.467032136187331, | |
| "learning_rate": 2.9182992911828585e-06, | |
| "loss": 0.8827, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.0384153661464586, | |
| "grad_norm": 5.834392828540893, | |
| "learning_rate": 2.791954789668264e-06, | |
| "loss": 0.518, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.0624249699879953, | |
| "grad_norm": 4.35808297800326, | |
| "learning_rate": 2.6673381931915466e-06, | |
| "loss": 0.5302, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.086434573829532, | |
| "grad_norm": 4.295950284412872, | |
| "learning_rate": 2.5445470204284384e-06, | |
| "loss": 0.6175, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.1104441776710683, | |
| "grad_norm": 4.498379665112538, | |
| "learning_rate": 2.4236773615699466e-06, | |
| "loss": 0.5003, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.134453781512605, | |
| "grad_norm": 4.087409019142491, | |
| "learning_rate": 2.304823803127023e-06, | |
| "loss": 0.5097, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.1584633853541417, | |
| "grad_norm": 4.141485559719574, | |
| "learning_rate": 2.1880793539119168e-06, | |
| "loss": 0.5089, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.1824729891956784, | |
| "grad_norm": 4.167475392774359, | |
| "learning_rate": 2.073535372254158e-06, | |
| "loss": 0.5004, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.2064825930372147, | |
| "grad_norm": 4.0504831735580655, | |
| "learning_rate": 1.961281494508129e-06, | |
| "loss": 0.4905, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.2304921968787514, | |
| "grad_norm": 4.582931550708554, | |
| "learning_rate": 1.8514055649081646e-06, | |
| "loss": 0.5111, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.254501800720288, | |
| "grad_norm": 5.055063219094166, | |
| "learning_rate": 1.743993566826077e-06, | |
| "loss": 0.4623, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.278511404561825, | |
| "grad_norm": 4.528043836852871, | |
| "learning_rate": 1.6391295554848957e-06, | |
| "loss": 0.4996, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.302521008403361, | |
| "grad_norm": 4.790281842406982, | |
| "learning_rate": 1.5368955921814844e-06, | |
| "loss": 0.5649, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.326530612244898, | |
| "grad_norm": 4.487294982617259, | |
| "learning_rate": 1.437371680069491e-06, | |
| "loss": 0.5021, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.3505402160864346, | |
| "grad_norm": 4.79811730303713, | |
| "learning_rate": 1.3406357015529236e-06, | |
| "loss": 0.4707, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.3745498199279713, | |
| "grad_norm": 4.331408379478664, | |
| "learning_rate": 1.2467633573392829e-06, | |
| "loss": 0.4973, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.398559423769508, | |
| "grad_norm": 4.81821270116248, | |
| "learning_rate": 1.15582810720001e-06, | |
| "loss": 0.4531, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.4225690276110443, | |
| "grad_norm": 3.840737088024985, | |
| "learning_rate": 1.0679011124845702e-06, | |
| "loss": 0.5042, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.446578631452581, | |
| "grad_norm": 5.275863790410332, | |
| "learning_rate": 9.830511804331467e-07, | |
| "loss": 0.4401, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 4.562178525657066, | |
| "learning_rate": 9.013447103315758e-07, | |
| "loss": 0.4665, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.494597839135654, | |
| "grad_norm": 3.757433482713813, | |
| "learning_rate": 8.22845641550598e-07, | |
| "loss": 0.4568, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.5186074429771907, | |
| "grad_norm": 4.151109799062541, | |
| "learning_rate": 7.476154035101279e-07, | |
| "loss": 0.4835, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.5426170468187275, | |
| "grad_norm": 3.6159229877862367, | |
| "learning_rate": 6.757128676076813e-07, | |
| "loss": 0.434, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.566626650660264, | |
| "grad_norm": 4.49946909123513, | |
| "learning_rate": 6.071943011485837e-07, | |
| "loss": 0.4279, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.590636254501801, | |
| "grad_norm": 4.935063508764955, | |
| "learning_rate": 5.421133233140096e-07, | |
| "loss": 0.463, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.614645858343337, | |
| "grad_norm": 4.07260484094093, | |
| "learning_rate": 4.80520863201308e-07, | |
| "loss": 0.4221, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.638655462184874, | |
| "grad_norm": 5.214934170547326, | |
| "learning_rate": 4.2246511996945904e-07, | |
| "loss": 0.6136, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.6626650660264106, | |
| "grad_norm": 4.308544688046405, | |
| "learning_rate": 3.679915251208305e-07, | |
| "loss": 0.4775, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.6866746698679473, | |
| "grad_norm": 3.7974095950716293, | |
| "learning_rate": 3.17142706948782e-07, | |
| "loss": 0.428, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.710684273709484, | |
| "grad_norm": 4.256161845989194, | |
| "learning_rate": 2.6995845717889715e-07, | |
| "loss": 0.4731, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.7346938775510203, | |
| "grad_norm": 4.158934712493225, | |
| "learning_rate": 2.2647569982998942e-07, | |
| "loss": 0.4358, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.758703481392557, | |
| "grad_norm": 4.066226801856005, | |
| "learning_rate": 1.8672846231922005e-07, | |
| "loss": 0.4977, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.7827130852340938, | |
| "grad_norm": 4.944564503261002, | |
| "learning_rate": 1.5074784883395587e-07, | |
| "loss": 0.4495, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.80672268907563, | |
| "grad_norm": 4.116559563991335, | |
| "learning_rate": 1.1856201599119876e-07, | |
| "loss": 0.4488, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.8307322929171668, | |
| "grad_norm": 4.300196152639256, | |
| "learning_rate": 9.019615080363087e-08, | |
| "loss": 0.4994, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.8547418967587035, | |
| "grad_norm": 4.367567850384355, | |
| "learning_rate": 6.56724509695289e-08, | |
| "loss": 0.474, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.87875150060024, | |
| "grad_norm": 4.2390360863938215, | |
| "learning_rate": 4.501010750196322e-08, | |
| "loss": 0.4507, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.902761104441777, | |
| "grad_norm": 4.352099381645135, | |
| "learning_rate": 2.8225289710876457e-08, | |
| "loss": 0.4552, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.926770708283313, | |
| "grad_norm": 4.25941958105542, | |
| "learning_rate": 1.5331132549794014e-08, | |
| "loss": 0.4984, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.95078031212485, | |
| "grad_norm": 3.9123796575299723, | |
| "learning_rate": 6.3377263370728585e-09, | |
| "loss": 0.43, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.9747899159663866, | |
| "grad_norm": 4.3724081003161945, | |
| "learning_rate": 1.2521088597239328e-09, | |
| "loss": 0.4747, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.993997599039616, | |
| "step": 1248, | |
| "total_flos": 15664931143680.0, | |
| "train_loss": 1.354868695904047, | |
| "train_runtime": 2143.3151, | |
| "train_samples_per_second": 9.322, | |
| "train_steps_per_second": 0.582 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1248, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 15664931143680.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |